Merge "Revert "Revert "ARM: VIXL32: Use VIXL backend by default."""
diff --git a/Android.mk b/Android.mk
index f3ab3c1..f8c5378 100644
--- a/Android.mk
+++ b/Android.mk
@@ -42,7 +42,7 @@
 
 .PHONY: clean-oat-host
 clean-oat-host:
-	find $(OUT_DIR) -name "*.oat" -o -name "*.odex" -o -name "*.art" | xargs rm -f
+	find $(OUT_DIR) -name "*.oat" -o -name "*.odex" -o -name "*.art" -o -name '*.vdex' | xargs rm -f
 ifneq ($(TMPDIR),)
 	rm -rf $(TMPDIR)/$(USER)/test-*/dalvik-cache/*
 	rm -rf $(TMPDIR)/android-data/dalvik-cache/*
@@ -388,6 +388,7 @@
 # libstdc++ is needed when building for ART_TARGET_LINUX.
 ART_TARGET_SHARED_LIBRARY_BENCHMARK := $(TARGET_OUT_SHARED_LIBRARIES)/libartbenchmark.so
 build-art-target-golem: dex2oat dalvikvm patchoat linker libstdc++ \
+                        $(TARGET_OUT_EXECUTABLES)/art \
                         $(TARGET_OUT)/etc/public.libraries.txt \
                         $(ART_TARGET_DEX_DEPENDENCIES) \
                         $(ART_TARGET_SHARED_LIBRARY_DEPENDENCIES) \
diff --git a/build/Android.bp b/build/Android.bp
index b1553c7..6c9f1d4 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -146,6 +146,7 @@
         "external/valgrind",
         "external/vixl/src",
         "external/zlib",
+        "libnativehelper/platform_include"
     ],
 
     tidy_checks: [
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 1591e34..27ec8b3 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -54,11 +54,11 @@
 ART_TEST_QUIET ?= true
 
 # Do you want interpreter tests run?
-ART_TEST_INTERPRETER ?= $(ART_TEST_FULL)
-ART_TEST_INTERPRETER_ACCESS_CHECKS ?= $(ART_TEST_FULL)
+ART_TEST_INTERPRETER ?= true
+ART_TEST_INTERPRETER_ACCESS_CHECKS ?= true
 
 # Do you want JIT tests run?
-ART_TEST_JIT ?= $(ART_TEST_FULL)
+ART_TEST_JIT ?= true
 
 # Do you want optimizing compiler tests run?
 ART_TEST_OPTIMIZING ?= true
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index d09f290..f924a85 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -21,7 +21,7 @@
 ART_CPPLINT_FLAGS := --quiet --root=$(ANDROID_BUILD_TOP)
 ART_CPPLINT_INGORED := \
     runtime/elf.h \
-    runtime/openjdkjvmti/jvmti.h
+    runtime/openjdkjvmti/include/jvmti.h
 
 # This:
 #  1) Gets a list of all .h & .cc files in the art directory.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index b661e00..c27f8db 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -75,8 +75,11 @@
 	$(call dexpreopt-remove-classes.dex,$@)
 
 ART_TEST_GTEST_VerifierDeps_SRC := $(abspath $(wildcard $(LOCAL_PATH)/VerifierDeps/*.smali))
+ART_TEST_GTEST_VerifierDepsMulti_SRC := $(abspath $(wildcard $(LOCAL_PATH)/VerifierDepsMulti/*.smali))
 ART_TEST_HOST_GTEST_VerifierDeps_DEX := $(dir $(ART_TEST_HOST_GTEST_Main_DEX))$(subst Main,VerifierDeps,$(basename $(notdir $(ART_TEST_HOST_GTEST_Main_DEX))))$(suffix $(ART_TEST_HOST_GTEST_Main_DEX))
 ART_TEST_TARGET_GTEST_VerifierDeps_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDeps,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX))
+ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_HOST_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_HOST_GTEST_Main_DEX))))$(suffix $(ART_TEST_HOST_GTEST_Main_DEX))
+ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX))
 
 $(ART_TEST_HOST_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
 	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
@@ -84,6 +87,12 @@
 $(ART_TEST_TARGET_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
 	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
 
+$(ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
+	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+
+$(ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
+	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+
 # Dex file dependencies for each gtest.
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 
@@ -115,7 +124,7 @@
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
 ART_GTEST_unstarted_runtime_test_DEX_DEPS := Nested
-ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps MultiDex
+ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps VerifierDepsMulti MultiDex
 ART_GTEST_dex_to_dex_decompiler_test_DEX_DEPS := VerifierDeps DexToDexDecompiler
 
 # The elf writer test has dependencies on core.oat.
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index f53740e..c733feb 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -109,7 +109,7 @@
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(2)ART_HOST_ARCH) \
 	  $$(LOCAL_$(2)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES_OPTION) \
-	  --host --android-root=$$(HOST_OUT) --include-patch-information \
+	  --host --android-root=$$(HOST_OUT) \
 	  --generate-debug-info --generate-build-id --compile-pic \
 	  $$(PRIVATE_CORE_MULTI_PARAM) $$(PRIVATE_CORE_COMPILE_OPTIONS)
 
@@ -212,7 +212,7 @@
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(2)TARGET_ARCH) \
 	  --instruction-set-variant=$$($(2)DEX2OAT_TARGET_CPU_VARIANT) \
 	  --instruction-set-features=$$($(2)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
-	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information \
+	  --android-root=$$(PRODUCT_OUT)/system \
 	  --generate-debug-info --generate-build-id --compile-pic \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1)
 
diff --git a/compiler/Android.bp b/compiler/Android.bp
index f6a4db4..d57f301 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -52,6 +52,7 @@
         "optimizing/cha_guard_optimization.cc",
         "optimizing/code_generator.cc",
         "optimizing/code_generator_utils.cc",
+        "optimizing/code_sinking.cc",
         "optimizing/constant_folding.cc",
         "optimizing/dead_code_elimination.cc",
         "optimizing/escape.cc",
@@ -111,6 +112,7 @@
                 "optimizing/instruction_simplifier_shared.cc",
                 "optimizing/intrinsics_arm.cc",
                 "optimizing/intrinsics_arm_vixl.cc",
+                "optimizing/nodes_shared.cc",
                 "utils/arm/assembler_arm.cc",
                 "utils/arm/assembler_arm_vixl.cc",
                 "utils/arm/assembler_thumb2.cc",
@@ -127,7 +129,6 @@
                 "optimizing/scheduler_arm64.cc",
                 "optimizing/instruction_simplifier_arm64.cc",
                 "optimizing/intrinsics_arm64.cc",
-                "optimizing/nodes_arm64.cc",
                 "utils/arm64/assembler_arm64.cc",
                 "utils/arm64/jni_macro_assembler_arm64.cc",
                 "utils/arm64/managed_register_arm64.cc",
@@ -350,6 +351,7 @@
         "optimizing/pretty_printer_test.cc",
         "optimizing/reference_type_propagation_test.cc",
         "optimizing/side_effects_test.cc",
+        "optimizing/ssa_liveness_analysis_test.cc",
         "optimizing/ssa_test.cc",
         "optimizing/stack_map_test.cc",
         "optimizing/suspend_check_test.cc",
@@ -416,6 +418,7 @@
 
     shared_libs: [
         "libartd-compiler",
+        "libartd-simulator",
         "libvixld-arm",
         "libvixld-arm64",
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index d89cdba..9a45379 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -52,10 +52,10 @@
         compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
                                                             method->GetDexMethodIndex()));
   }
-  if (compiled_method != nullptr) {
+  // If the code size is 0 it means the method was skipped due to profile guided compilation.
+  if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) {
     ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
     uint32_t code_size = code.size();
-    CHECK_NE(0u, code_size);
     ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
     uint32_t vmap_table_offset = vmap_table.empty() ? 0u
         : sizeof(OatQuickMethodHeader) + vmap_table.size();
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index e2a0942..00e2d62 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -170,7 +170,6 @@
   // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
   // patch_type_ as an uintN_t and do explicit static_cast<>s.
   enum class Type : uint8_t {
-    kRecordPosition,   // Just record patch position for patchoat.
     kMethod,
     kCall,
     kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
@@ -183,10 +182,6 @@
     kDexCacheArray,    // NOTE: Actual patching is instruction_set-dependent.
   };
 
-  static LinkerPatch RecordPosition(size_t literal_offset) {
-    return LinkerPatch(literal_offset, Type::kRecordPosition, /* target_dex_file */ nullptr);
-  }
-
   static LinkerPatch MethodPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_method_idx) {
diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc
index bfd485d..85d5784 100644
--- a/compiler/dex/dex_to_dex_decompiler.cc
+++ b/compiler/dex/dex_to_dex_decompiler.cc
@@ -20,7 +20,7 @@
 #include "base/mutex.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
-#include "optimizing/bytecode_utils.h"
+#include "bytecode_utils.h"
 
 namespace art {
 namespace optimizer {
@@ -185,7 +185,7 @@
   }
 
   if (quickened_info_ptr_ != quickened_info_end_) {
-    LOG(ERROR) << "Failed to use all values in quickening info."
+    LOG(FATAL) << "Failed to use all values in quickening info."
                << " Actual: " << std::hex << quickened_info_ptr_
                << " Expected: " << quickened_info_end_;
     return false;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 52ffa55..9950987 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -480,7 +480,9 @@
       DCHECK(!it.HasNext());
     }
   }
-  DCHECK_EQ(quickening_info_ptr, quickening_info_end) << "Failed to use all quickening info";
+  if (quickening_info_ptr != quickening_info_end) {
+    LOG(FATAL) << "Failed to use all quickening info";
+  }
 }
 
 void CompilerDriver::CompileAll(jobject class_loader,
@@ -535,9 +537,8 @@
   if (klass->IsVerified()) {
     // Class is verified so we can enable DEX-to-DEX compilation for performance.
     return max_level;
-  } else if (klass->IsCompileTimeVerified()) {
+  } else if (klass->ShouldVerifyAtRuntime()) {
     // Class verification has soft-failed. Anyway, ensure at least correctness.
-    DCHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
     return optimizer::DexToDexCompilationLevel::kRequired;
   } else {
     // Class verification has failed: do not run DEX-to-DEX compilation.
@@ -964,7 +965,7 @@
       if (cls == nullptr) {
         soa.Self()->ClearException();
       } else if (&cls->GetDexFile() == dex_file) {
-        DCHECK(cls->IsErroneous() || cls->IsVerified() || cls->IsCompileTimeVerified())
+        DCHECK(cls->IsErroneous() || cls->IsVerified() || cls->ShouldVerifyAtRuntime())
             << cls->PrettyClass()
             << " " << cls->GetStatus();
       }
@@ -1054,11 +1055,16 @@
 }
 
 bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
+  // Profile compilation info may be null if no profile is passed.
   if (!CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter())) {
     // Use the compiler filter instead of the presence of profile_compilation_info_ since
     // we may want to have full speed compilation along with profile based layout optimizations.
     return true;
   }
+  // If we are using a profile filter but do not have a profile compilation info, compile nothing.
+  if (profile_compilation_info_ == nullptr) {
+    return false;
+  }
   bool result = profile_compilation_info_->ContainsMethod(method_ref);
 
   if (kDebugProfileGuidedCompilation) {
@@ -2155,6 +2161,14 @@
         LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                    << " because: " << error_msg;
         manager_->GetCompiler()->SetHadHardVerifierFailure();
+      } else {
+        // Force a soft failure for the VerifierDeps. This is a sanity measure, as
+        // the vdex file already records that the class hasn't been resolved. It avoids
+        // trying to do future verification optimizations when processing the vdex file.
+        DCHECK(failure_kind == verifier::MethodVerifier::kSoftFailure ||
+               failure_kind == verifier::MethodVerifier::kNoFailure)
+            << failure_kind;
+        failure_kind = verifier::MethodVerifier::kSoftFailure;
       }
     } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) {
       CHECK(klass->IsResolved()) << klass->PrettyClass();
@@ -2167,7 +2181,7 @@
         manager_->GetCompiler()->SetHadHardVerifierFailure();
       }
 
-      CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous())
+      CHECK(klass->ShouldVerifyAtRuntime() || klass->IsVerified() || klass->IsErroneous())
           << klass->PrettyDescriptor() << ": state=" << klass->GetStatus();
 
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
@@ -2181,6 +2195,13 @@
           DCHECK(klass->IsVerified()) << "Boot classpath class " << klass->PrettyClass()
               << " failed to fully verify: state= " << klass->GetStatus();
         }
+        if (klass->IsVerified()) {
+          DCHECK_EQ(failure_kind, verifier::MethodVerifier::kNoFailure);
+        } else if (klass->ShouldVerifyAtRuntime()) {
+          DCHECK_EQ(failure_kind, verifier::MethodVerifier::kSoftFailure);
+        } else {
+          DCHECK_EQ(failure_kind, verifier::MethodVerifier::kHardFailure);
+        }
       }
     } else {
       // Make the skip a soft failure, essentially being considered as verify at runtime.
@@ -2278,7 +2299,7 @@
  public:
   explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
-  virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+  void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
     ATRACE_CALL();
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
@@ -2338,23 +2359,32 @@
               // mode which prevents the GC from visiting objects modified during the transaction.
               // Ensure GC is not run so don't access freed objects when aborting transaction.
 
-              ScopedAssertNoThreadSuspension ants("Transaction end");
-              runtime->ExitTransactionMode();
+              {
+                ScopedAssertNoThreadSuspension ants("Transaction end");
+                runtime->ExitTransactionMode();
+
+                if (!success) {
+                  CHECK(soa.Self()->IsExceptionPending());
+                  mirror::Throwable* exception = soa.Self()->GetException();
+                  VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+                      << exception->Dump();
+                  std::ostream* file_log = manager_->GetCompiler()->
+                      GetCompilerOptions().GetInitFailureOutput();
+                  if (file_log != nullptr) {
+                    *file_log << descriptor << "\n";
+                    *file_log << exception->Dump() << "\n";
+                  }
+                  soa.Self()->ClearException();
+                  transaction.Rollback();
+                  CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+                }
+              }
 
               if (!success) {
-                CHECK(soa.Self()->IsExceptionPending());
-                mirror::Throwable* exception = soa.Self()->GetException();
-                VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
-                    << exception->Dump();
-                std::ostream* file_log = manager_->GetCompiler()->
-                    GetCompilerOptions().GetInitFailureOutput();
-                if (file_log != nullptr) {
-                  *file_log << descriptor << "\n";
-                  *file_log << exception->Dump() << "\n";
-                }
-                soa.Self()->ClearException();
-                transaction.Rollback();
-                CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+                // On failure, still intern strings of static fields and seen in <clinit>, as these
+                // will be created in the zygote. This is separated from the transaction code just
+                // above as we will allocate strings, so must be allowed to suspend.
+                InternStrings(klass, class_loader);
               }
             }
           }
@@ -2370,6 +2400,57 @@
   }
 
  private:
+  void InternStrings(Handle<mirror::Class> klass, Handle<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(manager_->GetCompiler()->GetCompilerOptions().IsBootImage());
+    DCHECK(klass->IsVerified());
+    DCHECK(!klass->IsInitialized());
+
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache());
+    const DexFile* dex_file = manager_->GetDexFile();
+    const DexFile::ClassDef* class_def = klass->GetClassDef();
+    ClassLinker* class_linker = manager_->GetClassLinker();
+
+    // Check encoded final field values for strings and intern.
+    annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file,
+                                                                 &h_dex_cache,
+                                                                 &class_loader,
+                                                                 manager_->GetClassLinker(),
+                                                                 *class_def);
+    for ( ; value_it.HasNext(); value_it.Next()) {
+      if (value_it.GetValueType() == annotations::RuntimeEncodedStaticFieldValueIterator::kString) {
+        // Resolve the string. This will intern the string.
+        art::ObjPtr<mirror::String> resolved = class_linker->ResolveString(
+            *dex_file, dex::StringIndex(value_it.GetJavaValue().i), h_dex_cache);
+        CHECK(resolved != nullptr);
+      }
+    }
+
+    // Intern strings seen in <clinit>.
+    ArtMethod* clinit = klass->FindClassInitializer(class_linker->GetImagePointerSize());
+    if (clinit != nullptr) {
+      const DexFile::CodeItem* code_item = clinit->GetCodeItem();
+      DCHECK(code_item != nullptr);
+      const Instruction* inst = Instruction::At(code_item->insns_);
+
+      const uint32_t insns_size = code_item->insns_size_in_code_units_;
+      for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
+        if (inst->Opcode() == Instruction::CONST_STRING) {
+          ObjPtr<mirror::String> s = class_linker->ResolveString(
+              *dex_file, dex::StringIndex(inst->VRegB_21c()), h_dex_cache);
+          CHECK(s != nullptr);
+        } else if (inst->Opcode() == Instruction::CONST_STRING_JUMBO) {
+          ObjPtr<mirror::String> s = class_linker->ResolveString(
+              *dex_file, dex::StringIndex(inst->VRegB_31c()), h_dex_cache);
+          CHECK(s != nullptr);
+        }
+        dex_pc += inst->SizeInCodeUnits();
+        inst = inst->Next();
+      }
+    }
+  }
+
   const ParallelCompilationManager* const manager_;
 };
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 1e5c43d..cbde587 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -355,6 +355,10 @@
     return current_dex_to_dex_methods_;
   }
 
+  const ProfileCompilationInfo* GetProfileCompilationInfo() const {
+    return profile_compilation_info_;
+  }
+
  private:
   // Can `referrer_class` access the resolved `member`?
   // Dispatch call to mirror::Class::CanAccessResolvedField or
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 97954f3..35aa1ee 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -133,9 +133,10 @@
         << " " << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i)) << " "
         << dex.GetMethodName(dex.GetMethodId(i));
   }
-  EXPECT_EQ(dex.NumFieldIds(), dex_cache->NumResolvedFields());
+  EXPECT_TRUE(dex_cache->StaticArtFieldSize() == dex_cache->NumResolvedFields()
+      || dex.NumFieldIds() ==  dex_cache->NumResolvedFields());
   for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-    ArtField* field = cl->GetResolvedField(i, dex_cache);
+    ArtField* field = dex_cache->GetResolvedField(i, cl->GetImagePointerSize());
     EXPECT_TRUE(field != nullptr) << "field_idx=" << i
                                << " " << dex.GetFieldDeclaringClassDescriptor(dex.GetFieldId(i))
                                << " " << dex.GetFieldName(dex.GetFieldId(i));
@@ -240,9 +241,8 @@
 
     ProfileCompilationInfo info;
     for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
-      std::string key = ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation());
-      profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 1);
-      profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 2);
+      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
     }
     return &profile_info_;
   }
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index c222f90..34ad1c5 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -32,7 +32,6 @@
       no_inline_from_(nullptr),
       boot_image_(false),
       app_image_(false),
-      include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
       generate_debug_info_(kDefaultGenerateDebugInfo),
@@ -66,7 +65,6 @@
                                  size_t inline_depth_limit,
                                  size_t inline_max_code_units,
                                  const std::vector<const DexFile*>* no_inline_from,
-                                 bool include_patch_information,
                                  double top_k_profile_threshold,
                                  bool debuggable,
                                  bool generate_debug_info,
@@ -93,7 +91,6 @@
       no_inline_from_(no_inline_from),
       boot_image_(false),
       app_image_(false),
-      include_patch_information_(include_patch_information),
       top_k_profile_threshold_(top_k_profile_threshold),
       debuggable_(debuggable),
       generate_debug_info_(generate_debug_info),
@@ -206,10 +203,6 @@
     debuggable_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
-  } else if (option == "--include-patch-information") {
-    include_patch_information_ = true;
-  } else if (option == "--no-include-patch-information") {
-    include_patch_information_ = false;
   } else if (option == "--abort-on-hard-verifier-error") {
     abort_on_hard_verifier_failure_ = true;
   } else if (option.starts_with("--dump-init-failures=")) {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 6894cd5..2e3e55f 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -46,7 +46,6 @@
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultGenerateDebugInfo = false;
   static const bool kDefaultGenerateMiniDebugInfo = false;
-  static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
   static constexpr size_t kUnsetInlineDepthLimit = -1;
@@ -68,7 +67,6 @@
                   size_t inline_depth_limit,
                   size_t inline_max_code_units,
                   const std::vector<const DexFile*>* no_inline_from,
-                  bool include_patch_information,
                   double top_k_profile_threshold,
                   bool debuggable,
                   bool generate_debug_info,
@@ -213,10 +211,6 @@
     return implicit_suspend_checks_;
   }
 
-  bool GetIncludePatchInformation() const {
-    return include_patch_information_;
-  }
-
   bool IsBootImage() const {
     return boot_image_;
   }
@@ -305,7 +299,6 @@
 
   bool boot_image_;
   bool app_image_;
-  bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool debuggable_;
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index d55f745..7baae52 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -63,7 +63,6 @@
   virtual void EndText(OutputStream* text) = 0;
   virtual void WriteDynamicSection() = 0;
   virtual void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0;
-  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
   virtual bool End() = 0;
 
   // Get the ELF writer's stream. This stream can be used for writing data directly
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 0d6575c..28c35e9 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -105,7 +105,6 @@
   void EndText(OutputStream* text) OVERRIDE;
   void WriteDynamicSection() OVERRIDE;
   void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE;
-  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
   bool End() OVERRIDE;
 
   virtual OutputStream* GetStream() OVERRIDE;
@@ -268,17 +267,6 @@
 }
 
 template <typename ElfTypes>
-void ElfWriterQuick<ElfTypes>::WritePatchLocations(
-    const ArrayRef<const uintptr_t>& patch_locations) {
-  // Add relocation section for .text.
-  if (compiler_options_->GetIncludePatchInformation()) {
-    // Note that ElfWriter::Fixup will be called regardless and therefore
-    // we need to include oat_patches for debug sections unconditionally.
-    builder_->WritePatches(".text.oat_patches", patch_locations);
-  }
-}
-
-template <typename ElfTypes>
 bool ElfWriterQuick<ElfTypes>::End() {
   builder_->End();
   if (compiler_options_->GetGenerateBuildId()) {
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index b0225a3..89e8a67 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -318,7 +318,6 @@
 
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-        elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
 
         bool success = elf_writer->End();
         ASSERT_TRUE(success);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 117d113..aa73456 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -238,10 +238,11 @@
       case ImageHeader::kStorageModeLZ4: {
         const size_t compressed_max_size = LZ4_compressBound(image_data_size);
         compressed_data.reset(new char[compressed_max_size]);
-        data_size = LZ4_compress(
+        data_size = LZ4_compress_default(
             reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader),
             &compressed_data[0],
-            image_data_size);
+            image_data_size,
+            compressed_max_size);
 
         break;
       }
@@ -967,11 +968,12 @@
             << Class::PrettyClass(declaring_class) << " not in class linker table";
       }
     }
-    ArtField** resolved_fields = dex_cache->GetResolvedFields();
+    mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields();
     for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-      ArtField* field = mirror::DexCache::GetElementPtrSize(resolved_fields, i, target_ptr_size_);
+      auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, i, target_ptr_size_);
+      ArtField* field = pair.object;
       if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) {
-        dex_cache->SetResolvedField(i, nullptr, target_ptr_size_);
+        dex_cache->ClearResolvedField(pair.index, target_ptr_size_);
       }
     }
     // Clean the dex field. It might have been populated during the initialization phase, but
@@ -1576,10 +1578,8 @@
     }
     // Calculate the size of the class table.
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u);
-    mirror::ClassLoader* class_loader = compile_app_image_ ? *class_loaders_.begin() : nullptr;
-    DCHECK_EQ(image_info.class_table_->NumZygoteClasses(class_loader), 0u);
-    if (image_info.class_table_->NumNonZygoteClasses(class_loader) != 0u) {
+    DCHECK_EQ(image_info.class_table_->NumReferencedZygoteClasses(), 0u);
+    if (image_info.class_table_->NumReferencedNonZygoteClasses() != 0u) {
       image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
     }
   }
@@ -1595,7 +1595,7 @@
           break;
         }
         case kBinDexCacheArray:
-          bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment());
+          bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment(target_ptr_size_));
           break;
         case kBinImTable:
         case kBinIMTConflictTable: {
@@ -1924,9 +1924,8 @@
     // above comment for intern tables.
     ClassTable temp_class_table;
     temp_class_table.ReadFromMemory(class_table_memory_ptr);
-    ObjPtr<mirror::ClassLoader> class_loader = GetClassLoader();
-    CHECK_EQ(temp_class_table.NumZygoteClasses(class_loader),
-             table->NumNonZygoteClasses(class_loader) + table->NumZygoteClasses(class_loader));
+    CHECK_EQ(temp_class_table.NumReferencedZygoteClasses(),
+             table->NumReferencedNonZygoteClasses() + table->NumReferencedZygoteClasses());
     UnbufferedRootVisitor visitor(&root_visitor, RootInfo(kRootUnknown));
     temp_class_table.VisitRoots(visitor);
   }
@@ -2235,16 +2234,17 @@
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
-  ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
+  mirror::FieldDexCacheType* orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
                                                NativeLocationInImage(orig_fields),
                                                PointerSize::k64);
-    ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
+    mirror::FieldDexCacheType* copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
-      ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      ArtField* copy = NativeLocationInImage(orig);
-      mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
+      mirror::FieldDexCachePair orig =
+          mirror::DexCache::GetNativePairPtrSize(orig_fields, i, target_ptr_size_);
+      mirror::FieldDexCachePair copy(NativeLocationInImage(orig.object), orig.index);
+      mirror::DexCache::SetNativePairPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
   mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes();
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 9bd25d8..63c23cb 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -24,6 +24,10 @@
 // Note: adding a new intrinsic requires an art image version change,
 // as the modifiers flag for some ArtMethods will need to be changed.
 
+// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an OOME.
+// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
+// (kNoSideEffects), and it is also OK to remove it if it's unused.
+
 #define INTRINSICS_LIST(V) \
   V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
   V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -149,7 +153,8 @@
   V(UnsafeLoadFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "loadFence", "()V") \
   V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \
   V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
-  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;")
+  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
+  V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;")
 
 #endif  // ART_COMPILER_INTRINSICS_LIST_H_
 #undef ART_COMPILER_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index cbd831a..3ae7974 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -100,7 +100,6 @@
       CompilerOptions::kDefaultInlineDepthLimit,
       CompilerOptions::kDefaultInlineMaxCodeUnits,
       /* no_inline_from */ nullptr,
-      /* include_patch_information */ false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       Runtime::Current()->IsJavaDebuggable(),
       CompilerOptions::kDefaultGenerateDebugInfo,
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 66111f6..97b1374 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -250,7 +250,6 @@
 
     elf_writer->WriteDynamicSection();
     elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
-    elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
     if (!elf_writer->End()) {
       return false;
@@ -265,6 +264,7 @@
 
   void TestDexFileInput(bool verify, bool low_4gb, bool use_profile);
   void TestZipFileInput(bool verify);
+  void TestZipFileInputWithEmptyDex();
 
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
@@ -821,6 +821,28 @@
   TestZipFileInput(true);
 }
 
+void OatTest::TestZipFileInputWithEmptyDex() {
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+  bool success = zip_builder.AddFile("classes.dex", nullptr, 0);
+  ASSERT_TRUE(success);
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+  ScratchFile oat_file, vdex_file(oat_file, ".vdex");
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info(new ProfileCompilationInfo());
+  success = WriteElf(vdex_file.GetFile(), oat_file.GetFile(), input_filenames,
+                     key_value_store, /*verify*/false, profile_compilation_info.get());
+  ASSERT_FALSE(success);
+}
+
+TEST_F(OatTest, ZipFileInputWithEmptyDex) {
+  TestZipFileInputWithEmptyDex();
+}
+
 TEST_F(OatTest, UpdateChecksum) {
   InstructionSet insn_set = kX86;
   std::string error_msg;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 0ea1125..afcdf5e 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -104,6 +104,13 @@
 // Defines the location of the raw dex file to write.
 class OatWriter::DexFileSource {
  public:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
   explicit DexFileSource(ZipEntry* zip_entry)
       : type_(kZipEntry), source_(zip_entry) {
     DCHECK(source_ != nullptr);
@@ -119,6 +126,7 @@
     DCHECK(source_ != nullptr);
   }
 
+  Type GetType() const { return type_; }
   bool IsZipEntry() const { return type_ == kZipEntry; }
   bool IsRawFile() const { return type_ == kRawFile; }
   bool IsRawData() const { return type_ == kRawData; }
@@ -147,13 +155,6 @@
   }
 
  private:
-  enum Type {
-    kNone,
-    kZipEntry,
-    kRawFile,
-    kRawData,
-  };
-
   Type type_;
   const void* source_;
 };
@@ -1224,7 +1225,7 @@
                 break;
               }
               default: {
-                DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kRecordPosition);
+                DCHECK(false) << "Unexpected linker patch type: " << patch.GetType();
                 break;
               }
             }
@@ -2259,6 +2260,10 @@
     ZipEntry* zip_entry = oat_dex_file->source_.GetZipEntry();
     std::unique_ptr<MemMap> mem_map(
         zip_entry->ExtractToMemMap(location.c_str(), "classes.dex", &error_msg));
+    if (mem_map == nullptr) {
+      LOG(ERROR) << "Failed to extract dex file to mem map for layout: " << error_msg;
+      return false;
+    }
     dex_file = DexFile::Open(location,
                              zip_entry->GetCrc32(),
                              std::move(mem_map),
@@ -2266,7 +2271,8 @@
                              /* verify_checksum */ true,
                              &error_msg);
   } else {
-    DCHECK(oat_dex_file->source_.IsRawFile());
+    CHECK(oat_dex_file->source_.IsRawFile())
+        << static_cast<size_t>(oat_dex_file->source_.GetType());
     File* raw_file = oat_dex_file->source_.GetRawFile();
     dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg);
   }
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index db84166..5113714 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -225,10 +225,6 @@
     return oat_data_offset_;
   }
 
-  ArrayRef<const uintptr_t> GetAbsolutePatchLocations() const {
-    return ArrayRef<const uintptr_t>(absolute_patch_locations_);
-  }
-
   ~OatWriter();
 
   void AddMethodDebugInfos(const std::vector<debug::MethodDebugInfo>& infos) {
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 5d58207..cb6e14b 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@
   void RunBCE() {
     graph_->BuildDominatorTree();
 
-    InstructionSimplifier(graph_).Run();
+    InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
 
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8dd423f..424b850 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -861,8 +861,11 @@
 bool CodeGenerator::HasStackMapAtCurrentPc() {
   uint32_t pc = GetAssembler()->CodeSize();
   size_t count = stack_map_stream_.GetNumberOfStackMaps();
+  if (count == 0) {
+    return false;
+  }
   CodeOffset native_pc_offset = stack_map_stream_.GetStackMap(count - 1).native_pc_code_offset;
-  return (count > 0) && (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
+  return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
 }
 
 void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 759a951..e34f116 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,7 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
+#include "common_arm.h"
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
@@ -635,56 +636,25 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
 //
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is for the GcRoot read barrier.
-class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
- public:
-  ReadBarrierMarkSlowPathARM(HInstruction* instruction,
-                             Location ref,
-                             Location entrypoint = Location::NoLocation())
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM {
+ protected:
+  ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint)
       : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
-  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; }
 
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
+  // Generate assembly code calling the read barrier marking runtime
+  // entry point (ReadBarrierMarkRegX).
+  void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
     Register ref_reg = ref_.AsRegister<Register>();
-    DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
-    DCHECK(instruction_->IsInstanceFieldGet() ||
-           instruction_->IsStaticFieldGet() ||
-           instruction_->IsArrayGet() ||
-           instruction_->IsArraySet() ||
-           instruction_->IsLoadClass() ||
-           instruction_->IsLoadString() ||
-           instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
-           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
-        << "Unexpected instruction in read barrier marking slow path: "
-        << instruction_->DebugName();
-    // The read barrier instrumentation of object ArrayGet
-    // instructions does not support the HIntermediateAddress
-    // instruction.
-    DCHECK(!(instruction_->IsArrayGet() &&
-             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
-    __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
@@ -714,116 +684,331 @@
       arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
       __ blx(entrypoint_.AsRegister<Register>());
     } else {
+      // Entrypoint is not already loaded, load from the thread.
       int32_t entry_point_offset =
           CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
       // This runtime call does not require a stack map.
       arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     }
+  }
+
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if it is already loaded.
+  const Location entrypoint_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking.
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
+ public:
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction,
+                             Location ref,
+                             Location entrypoint = Location::NoLocation())
+      : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    if (kIsDebugBuild) {
+      Register ref_reg = ref_.AsRegister<Register>();
+      DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    }
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    GenerateReadBarrierMarkRuntimeCall(codegen);
     __ b(GetExitLabel());
   }
 
  private:
-  // The location (register) of the marked object reference.
-  const Location ref_;
-
-  // The location of the entrypoint if already loaded.
-  const Location entrypoint_;
-
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
 };
 
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+// below for that).
 //
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
  public:
-  ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
-                                           Location ref,
-                                           Register obj,
-                                           Location field_offset,
-                                           Register temp1,
-                                           Register temp2)
-      : SlowPathCodeARM(instruction),
-        ref_(ref),
+  LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction,
+                                               Location ref,
+                                               Register obj,
+                                               uint32_t offset,
+                                               Location index,
+                                               ScaleFactor scale_factor,
+                                               bool needs_null_check,
+                                               Register temp,
+                                               Location entrypoint)
+      : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
         obj_(obj),
-        field_offset_(field_offset),
-        temp1_(temp1),
-        temp2_(temp2) {
+        offset_(offset),
+        index_(index),
+        scale_factor_(scale_factor),
+        needs_null_check_(needs_null_check),
+        temp_(temp) {
     DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
   }
 
-  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+  const char* GetDescription() const OVERRIDE {
+    return "LoadReferenceWithBakerReadBarrierSlowPathARM";
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Register ref_reg = ref_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
-    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK_NE(ref_reg, temp_);
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+    __ Bind(GetEntryLabel());
+
+    // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+    // inserted after the original load. However, in fast path based
+    // Baker's read barriers, we need to perform the load of
+    // mirror::Object::monitor_ *before* the original reference load.
+    // This load-load ordering is required by the read barrier.
+    // The fast path/slow path (for Baker's algorithm) should look like:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //   }
+    //
+    // Note: the original implementation in ReadBarrier::Barrier is
+    // slightly more complex as it performs additional checks that we do
+    // not do here for performance reasons.
+
+    // /* int32_t */ monitor = obj->monitor_
+    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+    __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+    if (needs_null_check_) {
+      codegen->MaybeRecordImplicitNullCheck(instruction_);
+    }
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `obj` is unchanged by this operation, but its value now depends
+    // on `temp`.
+    __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32));
+
+    // The actual reference load.
+    // A possible implicit null check has already been handled above.
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->GenerateRawReferenceLoad(
+        instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+    // Mark the object `ref` when `obj` is gray.
+    //
+    // if (rb_state == ReadBarrier::GrayState())
+    //   ref = ReadBarrier::Mark(ref);
+    //
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+    __ b(GetExitLabel(), CC);  // Carry flag is the last bit shifted out by LSRS.
+    GenerateReadBarrierMarkRuntimeCall(codegen);
+
+    __ b(GetExitLabel());
+  }
+
+ private:
+  // The register containing the object holding the marked object reference field.
+  Register obj_;
+  // The offset, index and scale factor to access the reference in `obj_`.
+  uint32_t offset_;
+  Location index_;
+  ScaleFactor scale_factor_;
+  // Is a null check required?
+  bool needs_null_check_;
+  // A temporary register used to hold the lock word of `obj_`.
+  Register temp_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never
+// tries to update `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+    : public ReadBarrierMarkSlowPathBaseARM {
+ public:
+  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location index,
+                                                             ScaleFactor scale_factor,
+                                                             bool needs_null_check,
+                                                             Register temp1,
+                                                             Register temp2,
+                                                             Location entrypoint)
+      : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
+        obj_(obj),
+        offset_(offset),
+        index_(index),
+        scale_factor_(scale_factor),
+        needs_null_check_(needs_null_check),
+        temp1_(temp1),
+        temp2_(temp2) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    DCHECK_NE(ref_reg, temp1_);
+
+    // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking and field updating slow path: "
         << instruction_->DebugName();
     DCHECK(instruction_->GetLocations()->Intrinsified());
     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
-    DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+    DCHECK_EQ(offset_, 0u);
+    DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+    // The location of the offset of the marked reference field within `obj_`.
+    Location field_offset = index_;
+    DCHECK(field_offset.IsRegisterPair()) << field_offset;
 
     __ Bind(GetEntryLabel());
 
-    // Save the old reference.
+    // /* int32_t */ monitor = obj->monitor_
+    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+    __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+    if (needs_null_check_) {
+      codegen->MaybeRecordImplicitNullCheck(instruction_);
+    }
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `obj` is unchanged by this operation, but its value now depends
+    // on `temp1`.
+    __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32));
+
+    // The actual reference load.
+    // A possible implicit null check has already been handled above.
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->GenerateRawReferenceLoad(
+        instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+    // Mark the object `ref` when `obj` is gray.
+    //
+    // if (rb_state == ReadBarrier::GrayState())
+    //   ref = ReadBarrier::Mark(ref);
+    //
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+    __ b(GetExitLabel(), CC);  // Carry flag is the last bit shifted out by LSRS.
+
+    // Save the old value of the reference before marking it.
     // Note that we cannot use IP to save the old reference, as IP is
     // used internally by the ReadBarrierMarkRegX entry point, and we
     // need the old reference after the call to that entry point.
     DCHECK_NE(temp1_, IP);
     __ Mov(temp1_, ref_reg);
 
-    // No need to save live registers; it's taken care of by the
-    // entrypoint. Also, there is no need to update the stack mask,
-    // as this runtime call will not trigger a garbage collection.
-    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    DCHECK_NE(ref_reg, SP);
-    DCHECK_NE(ref_reg, LR);
-    DCHECK_NE(ref_reg, PC);
-    // IP is used internally by the ReadBarrierMarkRegX entry point
-    // as a temporary, it cannot be the entry point's input/output.
-    DCHECK_NE(ref_reg, IP);
-    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
-    // "Compact" slow path, saving two moves.
-    //
-    // Instead of using the standard runtime calling convention (input
-    // and output in R0):
-    //
-    //   R0 <- ref
-    //   R0 <- ReadBarrierMark(R0)
-    //   ref <- R0
-    //
-    // we just use rX (the register containing `ref`) as input and output
-    // of a dedicated entrypoint:
-    //
-    //   rX <- ReadBarrierMarkRegX(rX)
-    //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
-    // This runtime call does not require a stack map.
-    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    GenerateReadBarrierMarkRuntimeCall(codegen);
 
     // If the new reference is different from the old reference,
-    // update the field in the holder (`*(obj_ + field_offset_)`).
+    // update the field in the holder (`*(obj_ + field_offset)`).
     //
     // Note that this field could also hold a different object, if
     // another thread had concurrently changed it. In that case, the
     // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
     // (CAS) operation below would abort the CAS, leaving the field
     // as-is.
-    Label done;
     __ cmp(temp1_, ShifterOperand(ref_reg));
-    __ b(&done, EQ);
+    __ b(GetExitLabel(), EQ);
 
     // Update the the holder's field atomically.  This may fail if
     // mutator updates before us, but it's OK.  This is achieved
@@ -836,7 +1021,7 @@
     // The UnsafeCASObject intrinsic uses a register pair as field
     // offset ("long offset"), of which only the low part contains
     // data.
-    Register offset = field_offset_.AsRegisterPairLow<Register>();
+    Register offset = field_offset.AsRegisterPairLow<Register>();
     Register expected = temp1_;
     Register value = ref_reg;
     Register tmp_ptr = IP;       // Pointer to actual memory.
@@ -886,22 +1071,27 @@
       }
     }
 
-    __ Bind(&done);
     __ b(GetExitLabel());
   }
 
  private:
-  // The location (register) of the marked object reference.
-  const Location ref_;
   // The register containing the object holding the marked object reference field.
   const Register obj_;
-  // The location of the offset of the marked reference field within `obj_`.
-  Location field_offset_;
-
+  // The offset, index and scale factor to access the reference in `obj_`.
+  uint32_t offset_;
+  Location index_;
+  ScaleFactor scale_factor_;
+  // Is a null check required?
+  bool needs_null_check_;
+  // A temporary register used to hold the lock word of `obj_`; and
+  // also to hold the original reference value, when the reference is
+  // marked.
   const Register temp1_;
+  // A temporary register used in the implementation of the CAS, to
+  // update the object's reference field.
   const Register temp2_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
+  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM);
 };
 
 // Slow path generating a read barrier for a heap reference.
@@ -1132,10 +1322,6 @@
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
 };
 
-#undef __
-// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
-#define __ down_cast<ArmAssembler*>(GetAssembler())->  // NOLINT
-
 inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return EQ;
@@ -1191,6 +1377,519 @@
   }
 }
 
+inline Shift ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HDataProcWithShifterOp::kASR: return ASR;
+    case HDataProcWithShifterOp::kLSL: return LSL;
+    case HDataProcWithShifterOp::kLSR: return LSR;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+  }
+}
+
+static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
+                                        Register out,
+                                        Register first,
+                                        const ShifterOperand& second,
+                                        CodeGeneratorARM* codegen) {
+  if (second.IsImmediate() && second.GetImmediate() == 0) {
+    const ShifterOperand in = kind == HInstruction::kAnd
+        ? ShifterOperand(0)
+        : ShifterOperand(first);
+
+    __ mov(out, in);
+  } else {
+    switch (kind) {
+      case HInstruction::kAdd:
+        __ add(out, first, second);
+        break;
+      case HInstruction::kAnd:
+        __ and_(out, first, second);
+        break;
+      case HInstruction::kOr:
+        __ orr(out, first, second);
+        break;
+      case HInstruction::kSub:
+        __ sub(out, first, second);
+        break;
+      case HInstruction::kXor:
+        __ eor(out, first, second);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected instruction kind: " << kind;
+        UNREACHABLE();
+    }
+  }
+}
+
+static void GenerateDataProc(HInstruction::InstructionKind kind,
+                             const Location& out,
+                             const Location& first,
+                             const ShifterOperand& second_lo,
+                             const ShifterOperand& second_hi,
+                             CodeGeneratorARM* codegen) {
+  const Register first_hi = first.AsRegisterPairHigh<Register>();
+  const Register first_lo = first.AsRegisterPairLow<Register>();
+  const Register out_hi = out.AsRegisterPairHigh<Register>();
+  const Register out_lo = out.AsRegisterPairLow<Register>();
+
+  if (kind == HInstruction::kAdd) {
+    __ adds(out_lo, first_lo, second_lo);
+    __ adc(out_hi, first_hi, second_hi);
+  } else if (kind == HInstruction::kSub) {
+    __ subs(out_lo, first_lo, second_lo);
+    __ sbc(out_hi, first_hi, second_hi);
+  } else {
+    GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
+    GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
+  }
+}
+
+static ShifterOperand GetShifterOperand(Register rm, Shift shift, uint32_t shift_imm) {
+  return shift_imm == 0 ? ShifterOperand(rm) : ShifterOperand(rm, shift, shift_imm);
+}
+
+static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, CodeGeneratorARM* codegen) {
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+  DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+  const LocationSummary* const locations = instruction->GetLocations();
+  const uint32_t shift_value = instruction->GetShiftAmount();
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  const Location first = locations->InAt(0);
+  const Location second = locations->InAt(1);
+  const Location out = locations->Out();
+  const Register first_hi = first.AsRegisterPairHigh<Register>();
+  const Register first_lo = first.AsRegisterPairLow<Register>();
+  const Register out_hi = out.AsRegisterPairHigh<Register>();
+  const Register out_lo = out.AsRegisterPairLow<Register>();
+  const Register second_hi = second.AsRegisterPairHigh<Register>();
+  const Register second_lo = second.AsRegisterPairLow<Register>();
+  const Shift shift = ShiftFromOpKind(instruction->GetOpKind());
+
+  if (shift_value >= 32) {
+    if (shift == LSL) {
+      GenerateDataProcInstruction(kind,
+                                  out_hi,
+                                  first_hi,
+                                  ShifterOperand(second_lo, LSL, shift_value - 32),
+                                  codegen);
+      GenerateDataProcInstruction(kind,
+                                  out_lo,
+                                  first_lo,
+                                  ShifterOperand(0),
+                                  codegen);
+    } else if (shift == ASR) {
+      GenerateDataProc(kind,
+                       out,
+                       first,
+                       GetShifterOperand(second_hi, ASR, shift_value - 32),
+                       ShifterOperand(second_hi, ASR, 31),
+                       codegen);
+    } else {
+      DCHECK_EQ(shift, LSR);
+      GenerateDataProc(kind,
+                       out,
+                       first,
+                       GetShifterOperand(second_hi, LSR, shift_value - 32),
+                       ShifterOperand(0),
+                       codegen);
+    }
+  } else {
+    DCHECK_GT(shift_value, 1U);
+    DCHECK_LT(shift_value, 32U);
+
+    if (shift == LSL) {
+      // We are not doing this for HInstruction::kAdd because the output will require
+      // Location::kOutputOverlap; not applicable to other cases.
+      if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+        GenerateDataProcInstruction(kind,
+                                    out_hi,
+                                    first_hi,
+                                    ShifterOperand(second_hi, LSL, shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_hi,
+                                    out_hi,
+                                    ShifterOperand(second_lo, LSR, 32 - shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_lo,
+                                    first_lo,
+                                    ShifterOperand(second_lo, LSL, shift_value),
+                                    codegen);
+      } else {
+        __ Lsl(IP, second_hi, shift_value);
+        __ orr(IP, IP, ShifterOperand(second_lo, LSR, 32 - shift_value));
+        GenerateDataProc(kind,
+                         out,
+                         first,
+                         ShifterOperand(second_lo, LSL, shift_value),
+                         ShifterOperand(IP),
+                         codegen);
+      }
+    } else {
+      DCHECK(shift == ASR || shift == LSR);
+
+      // We are not doing this for HInstruction::kAdd because the output will require
+      // Location::kOutputOverlap; not applicable to other cases.
+      if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+        GenerateDataProcInstruction(kind,
+                                    out_lo,
+                                    first_lo,
+                                    ShifterOperand(second_lo, LSR, shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_lo,
+                                    out_lo,
+                                    ShifterOperand(second_hi, LSL, 32 - shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_hi,
+                                    first_hi,
+                                    ShifterOperand(second_hi, shift, shift_value),
+                                    codegen);
+      } else {
+        __ Lsr(IP, second_lo, shift_value);
+        __ orr(IP, IP, ShifterOperand(second_hi, LSL, 32 - shift_value));
+        GenerateDataProc(kind,
+                         out,
+                         first,
+                         ShifterOperand(IP),
+                         ShifterOperand(second_hi, shift, shift_value),
+                         codegen);
+      }
+    }
+  }
+}
+
+static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
+  Primitive::Type type = instruction->InputAt(0)->GetType();
+  Location lhs_loc = instruction->GetLocations()->InAt(0);
+  Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+    if (type == Primitive::kPrimFloat) {
+      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  } else {
+    if (type == Primitive::kPrimFloat) {
+      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  }
+}
+
+static Condition GenerateLongTestConstant(HCondition* condition,
+                                          bool invert,
+                                          CodeGeneratorARM* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
+  Condition ret = EQ;
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  DCHECK(right.IsConstant());
+
+  const Register left_high = left.AsRegisterPairHigh<Register>();
+  const Register left_low = left.AsRegisterPairLow<Register>();
+  int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE:
+      __ CmpConstant(left_high, High32Bits(value));
+      __ it(EQ);
+      __ cmp(left_low, ShifterOperand(Low32Bits(value)), EQ);
+      ret = ARMUnsignedCondition(cond);
+      break;
+    case kCondLE:
+    case kCondGT:
+      // Trivially true or false.
+      if (value == std::numeric_limits<int64_t>::max()) {
+        __ cmp(left_low, ShifterOperand(left_low));
+        ret = cond == kCondLE ? EQ : NE;
+        break;
+      }
+
+      if (cond == kCondLE) {
+        cond = kCondLT;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        cond = kCondGE;
+      }
+
+      value++;
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT:
+      __ CmpConstant(left_low, Low32Bits(value));
+      __ sbcs(IP, left_high, ShifterOperand(High32Bits(value)));
+      ret = ARMCondition(cond);
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static Condition GenerateLongTest(HCondition* condition,
+                                  bool invert,
+                                  CodeGeneratorARM* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
+  Condition ret = EQ;
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  DCHECK(right.IsRegisterPair());
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE:
+      __ cmp(left.AsRegisterPairHigh<Register>(),
+             ShifterOperand(right.AsRegisterPairHigh<Register>()));
+      __ it(EQ);
+      __ cmp(left.AsRegisterPairLow<Register>(),
+             ShifterOperand(right.AsRegisterPairLow<Register>()),
+             EQ);
+      ret = ARMUnsignedCondition(cond);
+      break;
+    case kCondLE:
+    case kCondGT:
+      if (cond == kCondLE) {
+        cond = kCondGE;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        cond = kCondLT;
+      }
+
+      std::swap(left, right);
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT:
+      __ cmp(left.AsRegisterPairLow<Register>(),
+             ShifterOperand(right.AsRegisterPairLow<Register>()));
+      __ sbcs(IP,
+              left.AsRegisterPairHigh<Register>(),
+              ShifterOperand(right.AsRegisterPairHigh<Register>()));
+      ret = ARMCondition(cond);
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static Condition GenerateTest(HInstruction* instruction,
+                              Location loc,
+                              bool invert,
+                              CodeGeneratorARM* codegen) {
+  DCHECK(!instruction->IsConstant());
+
+  Condition ret = invert ? EQ : NE;
+
+  if (IsBooleanValueOrMaterializedCondition(instruction)) {
+    __ CmpConstant(loc.AsRegister<Register>(), 0);
+  } else {
+    HCondition* const condition = instruction->AsCondition();
+    const LocationSummary* const locations = condition->GetLocations();
+    const Primitive::Type type = condition->GetLeft()->GetType();
+    const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
+    const Location right = locations->InAt(1);
+
+    if (type == Primitive::kPrimLong) {
+      ret = condition->GetLocations()->InAt(1).IsConstant()
+          ? GenerateLongTestConstant(condition, invert, codegen)
+          : GenerateLongTest(condition, invert, codegen);
+    } else if (Primitive::IsFloatingPointType(type)) {
+      GenerateVcmp(condition, codegen);
+      __ vmstat();
+      ret = ARMFPCondition(cond, condition->IsGtBias());
+    } else {
+      DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+      const Register left = locations->InAt(0).AsRegister<Register>();
+
+      if (right.IsRegister()) {
+        __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+      } else {
+        DCHECK(right.IsConstant());
+        __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      }
+
+      ret = ARMCondition(cond);
+    }
+  }
+
+  return ret;
+}
+
+static bool CanGenerateTest(HInstruction* condition, ArmAssembler* assembler) {
+  if (!IsBooleanValueOrMaterializedCondition(condition)) {
+    const HCondition* const cond = condition->AsCondition();
+
+    if (cond->GetLeft()->GetType() == Primitive::kPrimLong) {
+      const LocationSummary* const locations = cond->GetLocations();
+      const IfCondition c = cond->GetCondition();
+
+      if (locations->InAt(1).IsConstant()) {
+        const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+        ShifterOperand so;
+
+        if (c < kCondLT || c > kCondGE) {
+          // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+          // we check that the least significant half of the first input to be compared
+          // is in a low register (the other half is read outside an IT block), and
+          // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+          // encoding can be used.
+          if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+              !IsUint<8>(Low32Bits(value))) {
+            return false;
+          }
+        } else if (c == kCondLE || c == kCondGT) {
+          if (value < std::numeric_limits<int64_t>::max() &&
+              !assembler->ShifterOperandCanHold(kNoRegister,
+                                                kNoRegister,
+                                                SBC,
+                                                High32Bits(value + 1),
+                                                kCcSet,
+                                                &so)) {
+            return false;
+          }
+        } else if (!assembler->ShifterOperandCanHold(kNoRegister,
+                                                     kNoRegister,
+                                                     SBC,
+                                                     High32Bits(value),
+                                                     kCcSet,
+                                                     &so)) {
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
+  const Primitive::Type type = constant->GetType();
+  bool ret = false;
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    const uint64_t value = constant->AsLongConstant()->GetValueAsUint64();
+
+    ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
+  } else {
+    ret = IsUint<8>(CodeGenerator::GetInt32ValueOf(constant));
+  }
+
+  return ret;
+}
+
+static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+
+  if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
+static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
+  // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+  // we check that we are not dealing with floating-point output (there is no
+  // 16-bit VMOV encoding).
+  if (!out.IsRegister() && !out.IsRegisterPair()) {
+    return false;
+  }
+
+  // For constants, we also check that the output is in one or two low registers,
+  // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
+  // MOV encoding can be used.
+  if (src.IsConstant()) {
+    if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
+      return false;
+    }
+
+    if (out.IsRegister()) {
+      if (!ArmAssembler::IsLowRegister(out.AsRegister<Register>())) {
+        return false;
+      }
+    } else {
+      DCHECK(out.IsRegisterPair());
+
+      if (!ArmAssembler::IsLowRegister(out.AsRegisterPairHigh<Register>())) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+#undef __
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())->  // NOLINT
+
+Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) {
+  DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+
+  const HBasicBlock* const block = instruction->GetBlock();
+  const HLoopInformation* const info = block->GetLoopInformation();
+  HInstruction* const next = instruction->GetNext();
+
+  // Avoid a branch to a branch.
+  if (next->IsGoto() && (info == nullptr ||
+                         !info->IsBackEdge(*block) ||
+                         !info->HasSuspendCheck())) {
+    final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
+  }
+
+  return final_label;
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -1249,8 +1948,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -1717,44 +2414,6 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
-  Primitive::Type type = instruction->InputAt(0)->GetType();
-  Location lhs_loc = instruction->GetLocations()->InAt(0);
-  Location rhs_loc = instruction->GetLocations()->InAt(1);
-  if (rhs_loc.IsConstant()) {
-    // 0.0 is the only immediate that can be encoded directly in
-    // a VCMP instruction.
-    //
-    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
-    // specify that in a floating-point comparison, positive zero
-    // and negative zero are considered equal, so we can use the
-    // literal 0.0 for both cases here.
-    //
-    // Note however that some methods (Float.equal, Float.compare,
-    // Float.compareTo, Double.equal, Double.compare,
-    // Double.compareTo, Math.max, Math.min, StrictMath.max,
-    // StrictMath.min) consider 0.0 to be (strictly) greater than
-    // -0.0. So if we ever translate calls to these methods into a
-    // HCompare instruction, we must handle the -0.0 case with
-    // care here.
-    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
-    if (type == Primitive::kPrimFloat) {
-      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
-    }
-  } else {
-    if (type == Primitive::kPrimFloat) {
-      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
-    }
-  }
-}
-
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
                                                   Label* false_label ATTRIBUTE_UNUSED) {
@@ -1862,7 +2521,7 @@
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      GenerateVcmp(condition);
+      GenerateVcmp(condition, codegen_);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
@@ -1932,20 +2591,38 @@
       return;
     }
 
+    Label* non_fallthrough_target;
+    Condition arm_cond;
     LocationSummary* locations = cond->GetLocations();
     DCHECK(locations->InAt(0).IsRegister());
     Register left = locations->InAt(0).AsRegister<Register>();
     Location right = locations->InAt(1);
-    if (right.IsRegister()) {
-      __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
-    } else {
-      DCHECK(right.IsConstant());
-      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-    }
+
     if (true_target == nullptr) {
-      __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+      arm_cond = ARMCondition(condition->GetOppositeCondition());
+      non_fallthrough_target = false_target;
     } else {
-      __ b(true_target, ARMCondition(condition->GetCondition()));
+      arm_cond = ARMCondition(condition->GetCondition());
+      non_fallthrough_target = true_target;
+    }
+
+    if (right.IsConstant() && (arm_cond == NE || arm_cond == EQ) &&
+        CodeGenerator::GetInt32ValueOf(right.GetConstant()) == 0) {
+      if (arm_cond == EQ) {
+        __ CompareAndBranchIfZero(left, non_fallthrough_target);
+      } else {
+        DCHECK_EQ(arm_cond, NE);
+        __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+      }
+    } else {
+      if (right.IsRegister()) {
+        __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+      } else {
+        DCHECK(right.IsConstant());
+        __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      }
+
+      __ b(non_fallthrough_target, arm_cond);
     }
   }
 
@@ -2005,28 +2682,140 @@
 
 void LocationsBuilderARM::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  if (Primitive::IsFloatingPointType(select->GetType())) {
+  const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType());
+
+  if (is_floating_point) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
   }
+
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
-    locations->SetInAt(2, Location::RequiresRegister());
+    locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
+    // The code generator handles overlap with the values, but not with the condition.
+    locations->SetOut(Location::SameAsFirstInput());
+  } else if (is_floating_point) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    if (!locations->InAt(1).IsConstant()) {
+      locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
+    }
+
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
-  locations->SetOut(Location::SameAsFirstInput());
 }
 
 void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
-  LocationSummary* locations = select->GetLocations();
-  Label false_target;
-  GenerateTestAndBranch(select,
-                        /* condition_input_index */ 2,
-                        /* true_target */ nullptr,
-                        &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  HInstruction* const condition = select->GetCondition();
+  const LocationSummary* const locations = select->GetLocations();
+  const Primitive::Type type = select->GetType();
+  const Location first = locations->InAt(0);
+  const Location out = locations->Out();
+  const Location second = locations->InAt(1);
+  Location src;
+
+  if (condition->IsIntConstant()) {
+    if (condition->AsIntConstant()->IsFalse()) {
+      src = first;
+    } else {
+      src = second;
+    }
+
+    codegen_->MoveLocation(out, src, type);
+    return;
+  }
+
+  if (!Primitive::IsFloatingPointType(type) &&
+      CanGenerateTest(condition, codegen_->GetAssembler())) {
+    bool invert = false;
+
+    if (out.Equals(second)) {
+      src = first;
+      invert = true;
+    } else if (out.Equals(first)) {
+      src = second;
+    } else if (second.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
+      src = second;
+    } else if (first.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
+      src = first;
+      invert = true;
+    } else {
+      src = second;
+    }
+
+    if (CanGenerateConditionalMove(out, src)) {
+      if (!out.Equals(first) && !out.Equals(second)) {
+        codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
+      }
+
+      const Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_);
+
+      if (out.IsRegister()) {
+        ShifterOperand operand;
+
+        if (src.IsConstant()) {
+          operand = ShifterOperand(CodeGenerator::GetInt32ValueOf(src.GetConstant()));
+        } else {
+          DCHECK(src.IsRegister());
+          operand = ShifterOperand(src.AsRegister<Register>());
+        }
+
+        __ it(cond);
+        __ mov(out.AsRegister<Register>(), operand, cond);
+      } else {
+        DCHECK(out.IsRegisterPair());
+
+        ShifterOperand operand_high;
+        ShifterOperand operand_low;
+
+        if (src.IsConstant()) {
+          const int64_t value = src.GetConstant()->AsLongConstant()->GetValue();
+
+          operand_high = ShifterOperand(High32Bits(value));
+          operand_low = ShifterOperand(Low32Bits(value));
+        } else {
+          DCHECK(src.IsRegisterPair());
+          operand_high = ShifterOperand(src.AsRegisterPairHigh<Register>());
+          operand_low = ShifterOperand(src.AsRegisterPairLow<Register>());
+        }
+
+        __ it(cond);
+        __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond);
+        __ it(cond);
+        __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond);
+      }
+
+      return;
+    }
+  }
+
+  Label* false_target = nullptr;
+  Label* true_target = nullptr;
+  Label select_end;
+  Label* target = codegen_->GetFinalLabel(select, &select_end);
+
+  if (out.Equals(second)) {
+    true_target = target;
+    src = first;
+  } else {
+    false_target = target;
+    src = second;
+
+    if (!out.Equals(first)) {
+      codegen_->MoveLocation(out, first, type);
+    }
+  }
+
+  GenerateTestAndBranch(select, 2, true_target, false_target);
+  codegen_->MoveLocation(out, src, type);
+
+  if (select_end.IsLinked()) {
+    __ Bind(&select_end);
+  }
 }
 
 void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -2105,7 +2894,7 @@
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      GenerateVcmp(cond);
+      GenerateVcmp(cond, codegen_);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -4159,7 +4948,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ LoadImmediate(out, 0);
-      GenerateVcmp(compare);
+      GenerateVcmp(compare, codegen_);
       __ vmstat();  // transfer FP status register to ARM APSR.
       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
@@ -4515,17 +5304,29 @@
     return true;
   }
   Opcode neg_opcode = kNoOperand;
+  uint32_t neg_value = 0;
   switch (opcode) {
-    case AND: neg_opcode = BIC; value = ~value; break;
-    case ORR: neg_opcode = ORN; value = ~value; break;
-    case ADD: neg_opcode = SUB; value = -value; break;
-    case ADC: neg_opcode = SBC; value = ~value; break;
-    case SUB: neg_opcode = ADD; value = -value; break;
-    case SBC: neg_opcode = ADC; value = ~value; break;
+    case AND: neg_opcode = BIC; neg_value = ~value; break;
+    case ORR: neg_opcode = ORN; neg_value = ~value; break;
+    case ADD: neg_opcode = SUB; neg_value = -value; break;
+    case ADC: neg_opcode = SBC; neg_value = ~value; break;
+    case SUB: neg_opcode = ADD; neg_value = -value; break;
+    case SBC: neg_opcode = ADC; neg_value = ~value; break;
+    case MOV: neg_opcode = MVN; neg_value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
+
+  if (assembler->ShifterOperandCanHold(kNoRegister,
+                                       kNoRegister,
+                                       neg_opcode,
+                                       neg_value,
+                                       set_cc,
+                                       &so)) {
+    return true;
+  }
+
+  return opcode == AND && IsPowerOfTwo(value + 1);
 }
 
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -5427,21 +6228,59 @@
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+  // If both index and length are constants we can statically check the bounds. But if at least one
+  // of them is not encodable ArmEncodableConstantOrRegister will create
+  // Location::RequiresRegister() which is not desired to happen. Instead we create constant
+  // locations.
+  bool both_const = index->IsConstant() && length->IsConstant();
+  locations->SetInAt(0, both_const
+      ? Location::ConstantLocation(index->AsConstant())
+      : ArmEncodableConstantOrRegister(index, CMP));
+  locations->SetInAt(1, both_const
+      ? Location::ConstantLocation(length->AsConstant())
+      : ArmEncodableConstantOrRegister(length, CMP));
 }
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  SlowPathCodeARM* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
-  codegen_->AddSlowPath(slow_path);
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
 
-  Register index = locations->InAt(0).AsRegister<Register>();
-  Register length = locations->InAt(1).AsRegister<Register>();
+  if (length_loc.IsConstant()) {
+    int32_t length = helpers::Int32ConstantFrom(length_loc);
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guaranteed to pass.
+      int32_t index = helpers::Int32ConstantFrom(index_loc);
+      if (index < 0 || index >= length) {
+        SlowPathCodeARM* slow_path =
+            new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ b(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
 
-  __ cmp(index, ShifterOperand(length));
-  __ b(slow_path->GetEntryLabel(), HS);
+    SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+    __ cmp(index_loc.AsRegister<Register>(), ShifterOperand(length));
+    codegen_->AddSlowPath(slow_path);
+    __ b(slow_path->GetEntryLabel(), HS);
+  } else {
+    SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+    if (index_loc.IsConstant()) {
+      int32_t index = helpers::Int32ConstantFrom(index_loc);
+      __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index));
+    } else {
+      __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index_loc.AsRegister<Register>()));
+    }
+    codegen_->AddSlowPath(slow_path);
+    __ b(slow_path->GetEntryLabel(), LS);
+  }
 }
 
 void CodeGeneratorARM::MarkGCCard(Register temp,
@@ -6709,6 +7548,63 @@
   }
 }
 
+void LocationsBuilderARM::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* instruction) {
+  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+         instruction->GetType() == Primitive::kPrimLong);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  const bool overlap = instruction->GetType() == Primitive::kPrimLong &&
+                       HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* instruction) {
+  const LocationSummary* const locations = instruction->GetLocations();
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+  const Location out = locations->Out();
+
+  if (instruction->GetType() == Primitive::kPrimInt) {
+    DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+
+    const Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong
+        ? right.AsRegisterPairLow<Register>()
+        : right.AsRegister<Register>();
+
+    GenerateDataProcInstruction(kind,
+                                out.AsRegister<Register>(),
+                                left.AsRegister<Register>(),
+                                ShifterOperand(second,
+                                               ShiftFromOpKind(op_kind),
+                                               instruction->GetShiftAmount()),
+                                codegen_);
+  } else {
+    DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+
+    if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+      const Register second = right.AsRegister<Register>();
+
+      DCHECK_NE(out.AsRegisterPairLow<Register>(), second);
+      GenerateDataProc(kind,
+                       out,
+                       left,
+                       ShifterOperand(second),
+                       ShifterOperand(second, ASR, 31),
+                       codegen_);
+    } else {
+      GenerateLongDataProc(instruction, codegen_);
+    }
+  }
+}
+
 void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
   if (value == 0xffffffffu) {
@@ -6724,9 +7620,11 @@
   ShifterOperand so;
   if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) {
     __ and_(out, first, so);
-  } else {
-    DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so));
+  } else if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so)) {
     __ bic(out, first, ShifterOperand(~value));
+  } else {
+    DCHECK(IsPowerOfTwo(value + 1));
+    __ ubfx(out, first, 0, WhichPowerOf2(value + 1));
   }
 }
 
@@ -6940,14 +7838,35 @@
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
-      // Baker's read barrier are used:
+      // Baker's read barrier are used.
       //
-      //   root = obj.field;
+      // Note that we do not actually check the value of
+      // `GetIsGcMarking()` to decide whether to mark the loaded GC
+      // root or not.  Instead, we load into `temp` the read barrier
+      // mark entry point corresponding to register `root`. If `temp`
+      // is null, it means that `GetIsGcMarking()` is false, and vice
+      // versa.
+      //
       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   if (temp != null) {
-      //     root = temp(root)
+      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+      //     // Slow path.
+      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
       //   }
 
+      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+      Location temp = Location::RegisterLocation(LR);
+      SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+          instruction, root, /* entrypoint */ temp);
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
       __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
       static_assert(
@@ -6958,21 +7877,6 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path marking the GC root `root`.
-      Location temp = Location::RegisterLocation(LR);
-      SlowPathCodeARM* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
-              instruction,
-              root,
-              /*entrypoint*/ temp);
-      codegen_->AddSlowPath(slow_path);
-
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
       // The entrypoint is null when the GC is not marking, this prevents one load compared to
       // checking GetIsGcMarking.
       __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
@@ -7043,51 +7947,101 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
-  // In slow path based read barriers, the read barrier call is
-  // inserted after the original load. However, in fast path based
-  // Baker's read barriers, we need to perform the load of
-  // mirror::Object::monitor_ *before* the original reference load.
-  // This load-load ordering is required by the read barrier.
-  // The fast path/slow path (for Baker's algorithm) should look like:
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to mark the reference.
+  // Then, in the slow path, check the gray bit in the lock word of
+  // the reference's holder (`obj`) to decide whether to mark `ref` or
+  // not.
   //
-  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
-  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-  //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
-  //   if (is_gray) {
-  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp3` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp3` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp3 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp3(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
-  //
-  // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as it performs additional checks that we do
-  // not do here for performance reasons.
 
-  Register ref_reg = ref.AsRegister<Register>();
   Register temp_reg = temp.AsRegister<Register>();
-  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
-  // /* int32_t */ monitor = obj->monitor_
-  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
-  if (needs_null_check) {
-    MaybeRecordImplicitNullCheck(instruction);
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp3`.
+  Location temp3 = Location::RegisterLocation(LR);
+  SlowPathCodeARM* slow_path;
+  if (always_update_field) {
+    DCHECK(temp2 != nullptr);
+    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
+    // supports address of the form `obj + field_offset`, where `obj`
+    // is a register and `field_offset` is a register pair (of which
+    // only the lower half is used). Thus `offset` and `scale_factor`
+    // above are expected to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    Location field_offset = index;
+    slow_path =
+        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+            instruction,
+            ref,
+            obj,
+            offset,
+            /* index */ field_offset,
+            scale_factor,
+            needs_null_check,
+            temp_reg,
+            *temp2,
+            /* entrypoint */ temp3);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+        instruction,
+        ref,
+        obj,
+        offset,
+        index,
+        scale_factor,
+        needs_null_check,
+        temp_reg,
+        /* entrypoint */ temp3);
   }
-  // /* LockWord */ lock_word = LockWord(monitor)
-  static_assert(sizeof(LockWord) == sizeof(int32_t),
-                "art::LockWord and int32_t have different sizes.");
+  AddSlowPath(slow_path);
 
-  // Introduce a dependency on the lock_word including the rb_state,
-  // which shall prevent load-load reordering without using
-  // a memory barrier (which would be more expensive).
-  // `obj` is unchanged by this operation, but its value now depends
-  // on `temp_reg`.
-  __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
+  // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
+  // Fast path: just load the reference.
+  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  __ Bind(slow_path->GetExitLabel());
+}
 
-  // The actual reference load.
+void CodeGeneratorARM::GenerateRawReferenceLoad(HInstruction* instruction,
+                                                Location ref,
+                                                Register obj,
+                                                uint32_t offset,
+                                                Location index,
+                                                ScaleFactor scale_factor,
+                                                bool needs_null_check) {
+  Register ref_reg = ref.AsRegister<Register>();
+
   if (index.IsValid()) {
     // Load types involving an "index": ArrayGet,
     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
     // intrinsics.
-    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
     if (index.IsConstant()) {
       size_t computed_offset =
           (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
@@ -7104,41 +8058,16 @@
       __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
     }
   } else {
-    // /* HeapReference<Object> */ ref = *(obj + offset)
+    // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
     __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
   }
 
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+
   // Object* ref = ref_addr->AsMirrorPtr()
   __ MaybeUnpoisonHeapReference(ref_reg);
-
-  // Slow path marking the object `ref` when it is gray.
-  SlowPathCodeARM* slow_path;
-  if (always_update_field) {
-    DCHECK(temp2 != nullptr);
-    // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
-    // of the form `obj + field_offset`, where `obj` is a register and
-    // `field_offset` is a register pair (of which only the lower half
-    // is used). Thus `offset` and `scale_factor` above are expected
-    // to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
-    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
-        instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
-  }
-  AddSlowPath(slow_path);
-
-  // if (rb_state == ReadBarrier::GrayState())
-  //   ref = ReadBarrier::Mark(ref);
-  // Given the numeric representation, it's enough to check the low bit of the
-  // rb_state. We do that by shifting the bit out of the lock word with LSRS
-  // which can be a 16-bit instruction unlike the TST immediate.
-  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-  __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
-  __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
-  __ Bind(slow_path->GetExitLabel());
 }
 
 void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
@@ -7381,9 +8310,7 @@
 }
 
 Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file,
@@ -7434,8 +8361,7 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
-      boot_image_address_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -7469,13 +8395,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = literal->GetLabel()->Position();
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index df2dbc7..5b15902 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -299,7 +299,6 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
-  void GenerateVcmp(HInstruction* instruction);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -422,6 +421,8 @@
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
+  Label* GetFinalLabel(HInstruction* instruction, Label* final_label);
+
   void Initialize() OVERRIDE {
     block_labels_ = CommonInitializeLabels<Label>();
   }
@@ -520,9 +521,6 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
-  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-
   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
   //
@@ -545,6 +543,15 @@
                                                  bool always_update_field = false,
                                                  Register* temp2 = nullptr);
 
+  // Generate a heap reference load (with no read barrier).
+  void GenerateRawReferenceLoad(HInstruction* instruction,
+                                Location ref,
+                                Register obj,
+                                uint32_t offset,
+                                Location index,
+                                ScaleFactor scale_factor,
+                                bool needs_null_check);
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
@@ -642,8 +649,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index e6032d2..28cc942 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -633,58 +633,23 @@
   }
 }
 
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
 //
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is for the GcRoot read barrier.
-class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
- public:
-  ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
-                               Location ref,
-                               Location entrypoint = Location::NoLocation())
-      : SlowPathCodeARM64(instruction),
-        ref_(ref),
-        entrypoint_(entrypoint) {
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
+ protected:
+  ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
+      : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
-  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
 
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(locations->CanCall());
-    DCHECK(ref_.IsRegister()) << ref_;
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
-    DCHECK(instruction_->IsInstanceFieldGet() ||
-           instruction_->IsStaticFieldGet() ||
-           instruction_->IsArrayGet() ||
-           instruction_->IsArraySet() ||
-           instruction_->IsLoadClass() ||
-           instruction_->IsLoadString() ||
-           instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
-           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
-        << "Unexpected instruction in read barrier marking slow path: "
-        << instruction_->DebugName();
-    // The read barrier instrumentation of object ArrayGet
-    // instructions does not support the HIntermediateAddress
-    // instruction.
-    DCHECK(!(instruction_->IsArrayGet() &&
-             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
-    __ Bind(GetEntryLabel());
+  // Generate assembly code calling the read barrier marking runtime
+  // entry point (ReadBarrierMarkRegX).
+  void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
@@ -720,46 +685,261 @@
       // This runtime call does not require a stack map.
       arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     }
-    __ B(GetExitLabel());
   }
 
- private:
   // The location (register) of the marked object reference.
   const Location ref_;
 
   // The location of the entrypoint if it is already loaded.
   const Location entrypoint_;
 
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking.
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
+ public:
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
+                               Location ref,
+                               Location entrypoint = Location::NoLocation())
+      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(ref_.IsRegister()) << ref_;
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    GenerateReadBarrierMarkRuntimeCall(codegen);
+    __ B(GetExitLabel());
+  }
+
+ private:
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
 };
 
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM64 above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
+ public:
+  LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 size_t scale_factor,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire,
+                                                 Register temp,
+                                                 Location entrypoint)
+      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
+        obj_(obj),
+        offset_(offset),
+        index_(index),
+        scale_factor_(scale_factor),
+        needs_null_check_(needs_null_check),
+        use_load_acquire_(use_load_acquire),
+        temp_(temp) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(ref_.IsRegister()) << ref_;
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+    DCHECK(obj_.IsW());
+    DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+    // Temporary register `temp_`, used to store the lock word, must
+    // not be IP0 nor IP1, as we may use them to emit the reference
+    // load (in the call to GenerateRawReferenceLoad below), and we
+    // need the lock word to still be in `temp_` after the reference
+    // load.
+    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+    DCHECK_NE(LocationFrom(temp_).reg(), IP1);
+
+    __ Bind(GetEntryLabel());
+
+    // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+    // inserted after the original load. However, in fast path based
+    // Baker's read barriers, we need to perform the load of
+    // mirror::Object::monitor_ *before* the original reference load.
+    // This load-load ordering is required by the read barrier.
+    // The fast path/slow path (for Baker's algorithm) should look like:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //   }
+    //
+    // Note: the original implementation in ReadBarrier::Barrier is
+    // slightly more complex as it performs additional checks that we do
+    // not do here for performance reasons.
+
+    // /* int32_t */ monitor = obj->monitor_
+    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+    __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
+    if (needs_null_check_) {
+      codegen->MaybeRecordImplicitNullCheck(instruction_);
+    }
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including rb_state,
+    // to prevent load-load reordering, and without using
+    // a memory barrier (which would be more expensive).
+    // `obj` is unchanged by this operation, but its value now depends
+    // on `temp`.
+    __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
+
+    // The actual reference load.
+    // A possible implicit null check has already been handled above.
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->GenerateRawReferenceLoad(instruction_,
+                                            ref_,
+                                            obj_,
+                                            offset_,
+                                            index_,
+                                            scale_factor_,
+                                            /* needs_null_check */ false,
+                                            use_load_acquire_);
+
+    // Mark the object `ref` when `obj` is gray.
+    //
+    //   if (rb_state == ReadBarrier::GrayState())
+    //     ref = ReadBarrier::Mark(ref);
+    //
+    // Given the numeric representation, it's enough to check the low bit of the rb_state.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
+    GenerateReadBarrierMarkRuntimeCall(codegen);
+
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The register containing the object holding the marked object reference field.
+  Register obj_;
+  // The offset, index and scale factor to access the reference in `obj_`.
+  uint32_t offset_;
+  Location index_;
+  size_t scale_factor_;
+  // Is a null check required?
+  bool needs_null_check_;
+  // Should this reference load use Load-Acquire semantics?
+  bool use_load_acquire_;
+  // A temporary register used to hold the lock word of `obj_`.
+  Register temp_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
+// tries to update `obj.field`).
 //
 // This means that after the execution of this slow path, both `ref`
 // and `obj.field` will be up-to-date; i.e., after the flip, both will
 // hold the same to-space reference (unless another thread installed
 // another object reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+    : public ReadBarrierMarkSlowPathBaseARM64 {
  public:
-  ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
-                                             Location ref,
-                                             Register obj,
-                                             Location field_offset,
-                                             Register temp)
-      : SlowPathCodeARM64(instruction),
-        ref_(ref),
+  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+                                                               Location ref,
+                                                               Register obj,
+                                                               uint32_t offset,
+                                                               Location index,
+                                                               size_t scale_factor,
+                                                               bool needs_null_check,
+                                                               bool use_load_acquire,
+                                                               Register temp,
+                                                               Location entrypoint)
+      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
         obj_(obj),
-        field_offset_(field_offset),
+        offset_(offset),
+        index_(index),
+        scale_factor_(scale_factor),
+        needs_null_check_(needs_null_check),
+        use_load_acquire_(use_load_acquire),
         temp_(temp) {
     DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
   }
 
   const char* GetDescription() const OVERRIDE {
-    return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
+    return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -768,64 +948,90 @@
     DCHECK(locations->CanCall());
     DCHECK(ref_.IsRegister()) << ref_;
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
-    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK(obj_.IsW());
+    DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
+
+    // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking and field updating slow path: "
         << instruction_->DebugName();
     DCHECK(instruction_->GetLocations()->Intrinsified());
     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
-    DCHECK(field_offset_.IsRegister()) << field_offset_;
+    DCHECK_EQ(offset_, 0u);
+    DCHECK_EQ(scale_factor_, 0u);
+    DCHECK_EQ(use_load_acquire_, false);
+    // The location of the offset of the marked reference field within `obj_`.
+    Location field_offset = index_;
+    DCHECK(field_offset.IsRegister()) << field_offset;
+
+    // Temporary register `temp_`, used to store the lock word, must
+    // not be IP0 nor IP1, as we may use them to emit the reference
+    // load (in the call to GenerateRawReferenceLoad below), and we
+    // need the lock word to still be in `temp_` after the reference
+    // load.
+    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+    DCHECK_NE(LocationFrom(temp_).reg(), IP1);
 
     __ Bind(GetEntryLabel());
 
-    // Save the old reference.
+    // /* int32_t */ monitor = obj->monitor_
+    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+    __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
+    if (needs_null_check_) {
+      codegen->MaybeRecordImplicitNullCheck(instruction_);
+    }
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including rb_state,
+    // to prevent load-load reordering, and without using
+    // a memory barrier (which would be more expensive).
+    // `obj` is unchanged by this operation, but its value now depends
+    // on `temp`.
+    __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
+
+    // The actual reference load.
+    // A possible implicit null check has already been handled above.
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->GenerateRawReferenceLoad(instruction_,
+                                            ref_,
+                                            obj_,
+                                            offset_,
+                                            index_,
+                                            scale_factor_,
+                                            /* needs_null_check */ false,
+                                            use_load_acquire_);
+
+    // Mark the object `ref` when `obj` is gray.
+    //
+    //   if (rb_state == ReadBarrier::GrayState())
+    //     ref = ReadBarrier::Mark(ref);
+    //
+    // Given the numeric representation, it's enough to check the low bit of the rb_state.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
+
+    // Save the old value of the reference before marking it.
     // Note that we cannot use IP to save the old reference, as IP is
     // used internally by the ReadBarrierMarkRegX entry point, and we
     // need the old reference after the call to that entry point.
     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
     __ Mov(temp_.W(), ref_reg);
 
-    // No need to save live registers; it's taken care of by the
-    // entrypoint. Also, there is no need to update the stack mask,
-    // as this runtime call will not trigger a garbage collection.
-    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    DCHECK_NE(ref_.reg(), LR);
-    DCHECK_NE(ref_.reg(), WSP);
-    DCHECK_NE(ref_.reg(), WZR);
-    // IP0 is used internally by the ReadBarrierMarkRegX entry point
-    // as a temporary, it cannot be the entry point's input/output.
-    DCHECK_NE(ref_.reg(), IP0);
-    DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
-    // "Compact" slow path, saving two moves.
-    //
-    // Instead of using the standard runtime calling convention (input
-    // and output in W0):
-    //
-    //   W0 <- ref
-    //   W0 <- ReadBarrierMark(W0)
-    //   ref <- W0
-    //
-    // we just use rX (the register containing `ref`) as input and output
-    // of a dedicated entrypoint:
-    //
-    //   rX <- ReadBarrierMarkRegX(rX)
-    //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
-    // This runtime call does not require a stack map.
-    arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    GenerateReadBarrierMarkRuntimeCall(codegen);
 
     // If the new reference is different from the old reference,
-    // update the field in the holder (`*(obj_ + field_offset_)`).
+    // update the field in the holder (`*(obj_ + field_offset)`).
     //
     // Note that this field could also hold a different object, if
     // another thread had concurrently changed it. In that case, the
     // LDXR/CMP/BNE sequence of instructions in the compare-and-set
     // (CAS) operation below would abort the CAS, leaving the field
     // as-is.
-    vixl::aarch64::Label done;
     __ Cmp(temp_.W(), ref_reg);
-    __ B(eq, &done);
+    __ B(eq, GetExitLabel());
 
     // Update the the holder's field atomically.  This may fail if
     // mutator updates before us, but it's OK.  This is achieved
@@ -838,7 +1044,7 @@
 
     // Convenience aliases.
     Register base = obj_.W();
-    Register offset = XRegisterFrom(field_offset_);
+    Register offset = XRegisterFrom(field_offset);
     Register expected = temp_.W();
     Register value = ref_reg;
     Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
@@ -882,21 +1088,26 @@
       }
     }
 
-    __ Bind(&done);
     __ B(GetExitLabel());
   }
 
  private:
-  // The location (register) of the marked object reference.
-  const Location ref_;
   // The register containing the object holding the marked object reference field.
   const Register obj_;
-  // The location of the offset of the marked reference field within `obj_`.
-  Location field_offset_;
-
+  // The offset, index and scale factor to access the reference in `obj_`.
+  uint32_t offset_;
+  Location index_;
+  size_t scale_factor_;
+  // Is a null check required?
+  bool needs_null_check_;
+  // Should this reference load use Load-Acquire semantics?
+  bool use_load_acquire_;
+  // A temporary register used to hold the lock word of `obj_`; and
+  // also to hold the original reference value, when the reference is
+  // marked.
   const Register temp_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
+  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
 };
 
 // Slow path generating a read barrier for a heap reference.
@@ -1200,8 +1411,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -2186,7 +2395,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     default:
@@ -2277,8 +2486,8 @@
   }
 }
 
-void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
-    HArm64DataProcWithShifterOp* instruction) {
+void LocationsBuilderARM64::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* instruction) {
   DCHECK(instruction->GetType() == Primitive::kPrimInt ||
          instruction->GetType() == Primitive::kPrimLong);
   LocationSummary* locations =
@@ -2292,8 +2501,8 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
-    HArm64DataProcWithShifterOp* instruction) {
+void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* instruction) {
   Primitive::Type type = instruction->GetType();
   HInstruction::InstructionKind kind = instruction->GetInstrKind();
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
@@ -2302,21 +2511,20 @@
   if (kind != HInstruction::kNeg) {
     left = InputRegisterAt(instruction, 0);
   }
-  // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+  // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
   // shifter operand operation, the IR generating `right_reg` (input to the type
   // conversion) can have a different type from the current instruction's type,
   // so we manually indicate the type.
   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
-  int64_t shift_amount = instruction->GetShiftAmount() &
-      (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
-
   Operand right_operand(0);
 
-  HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
-  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+  HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+  if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
   } else {
-    right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+    right_operand = Operand(right_reg,
+                            helpers::ShiftFromOpKind(op_kind),
+                            instruction->GetShiftAmount());
   }
 
   // Logical binary operations do not support extension operations in the
@@ -2357,7 +2565,7 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
@@ -2428,6 +2636,9 @@
                                                        LocationSummary::kNoCall);
   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -2463,7 +2674,7 @@
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
-    Register temp = temps.AcquireW();
+    Register temp = WRegisterFrom(locations->GetTemp(0));
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -3420,7 +3631,7 @@
   if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetInAt(1, Location::RequiresFpuRegister());
-    locations->SetOut(Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
@@ -3443,7 +3654,7 @@
                                                  : Location::ConstantLocation(cst_true_value));
     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
                                                   : Location::ConstantLocation(cst_false_value));
-    locations->SetOut(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
@@ -4095,7 +4306,7 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -4108,7 +4319,7 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -4329,9 +4540,7 @@
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
     uint64_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
@@ -4399,8 +4608,7 @@
       pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size() +
-      boot_image_address_patches_.size();
+      type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
@@ -4434,11 +4642,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -5615,14 +5818,35 @@
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
-      // Baker's read barrier are used:
+      // Baker's read barrier are used.
       //
-      //   root = obj.field;
+      // Note that we do not actually check the value of
+      // `GetIsGcMarking()` to decide whether to mark the loaded GC
+      // root or not.  Instead, we load into `temp` the read barrier
+      // mark entry point corresponding to register `root`. If `temp`
+      // is null, it means that `GetIsGcMarking()` is false, and vice
+      // versa.
+      //
       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   if (temp != null) {
-      //     root = temp(root)
+      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+      //     // Slow path.
+      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
       //   }
 
+      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+      Register temp = lr;
+      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+          instruction, root, /* entrypoint */ LocationFrom(temp));
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ Ldr(temp, MemOperand(tr, entry_point_offset));
+
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
       if (fixup_label == nullptr) {
         __ Ldr(root_reg, MemOperand(obj, offset));
@@ -5637,20 +5861,6 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      Register temp = lr;
-
-      // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp.
-      SlowPathCodeARM64* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction,
-                                                                    root,
-                                                                    LocationFrom(temp));
-      codegen_->AddSlowPath(slow_path);
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      __ Ldr(temp, MemOperand(tr, entry_point_offset));
       // The entrypoint is null when the GC is not marking, this prevents one load compared to
       // checking GetIsGcMarking.
       __ Cbnz(temp, slow_path->GetEntryLabel());
@@ -5752,54 +5962,103 @@
   // `instruction->IsArrayGet()` => `!use_load_acquire`.
   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
 
-  MacroAssembler* masm = GetVIXLAssembler();
-  UseScratchRegisterScope temps(masm);
-
-  // In slow path based read barriers, the read barrier call is
-  // inserted after the original load. However, in fast path based
-  // Baker's read barriers, we need to perform the load of
-  // mirror::Object::monitor_ *before* the original reference load.
-  // This load-load ordering is required by the read barrier.
-  // The fast path/slow path (for Baker's algorithm) should look like:
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to mark the reference.
+  // Then, in the slow path, check the gray bit in the lock word of
+  // the reference's holder (`obj`) to decide whether to mark `ref` or
+  // not.
   //
-  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
-  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-  //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
-  //   if (is_gray) {
-  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
-  //
-  // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as it performs additional checks that we do
-  // not do here for performance reasons.
 
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp2`.
+  Register temp2 = lr;
+  Location temp2_loc = LocationFrom(temp2);
+  SlowPathCodeARM64* slow_path;
+  if (always_update_field) {
+    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
+    // only supports address of the form `obj + field_offset`, where
+    // `obj` is a register and `field_offset` is a register. Thus
+    // `offset` and `scale_factor` above are expected to be null in
+    // this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
+    Location field_offset = index;
+    slow_path =
+        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+            instruction,
+            ref,
+            obj,
+            offset,
+            /* index */ field_offset,
+            scale_factor,
+            needs_null_check,
+            use_load_acquire,
+            temp,
+            /* entrypoint */ temp2_loc);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+        instruction,
+        ref,
+        obj,
+        offset,
+        index,
+        scale_factor,
+        needs_null_check,
+        use_load_acquire,
+        temp,
+        /* entrypoint */ temp2_loc);
+  }
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ Cbnz(temp2, slow_path->GetEntryLabel());
+  // Fast path: just load the reference.
+  GenerateRawReferenceLoad(
+      instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
+                                                  Location ref,
+                                                  Register obj,
+                                                  uint32_t offset,
+                                                  Location index,
+                                                  size_t scale_factor,
+                                                  bool needs_null_check,
+                                                  bool use_load_acquire) {
+  DCHECK(obj.IsW());
   Primitive::Type type = Primitive::kPrimNot;
   Register ref_reg = RegisterFrom(ref, type);
-  DCHECK(obj.IsW());
-  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
-  {
-    // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
-    EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-    // /* int32_t */ monitor = obj->monitor_
-    __ Ldr(temp, HeapOperand(obj, monitor_offset));
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
-    }
-  }
-  // /* LockWord */ lock_word = LockWord(monitor)
-  static_assert(sizeof(LockWord) == sizeof(int32_t),
-                "art::LockWord and int32_t have different sizes.");
+  // If needed, vixl::EmissionCheckScope guards are used to ensure
+  // that no pools are emitted between the load (macro) instruction
+  // and MaybeRecordImplicitNullCheck.
 
-  // Introduce a dependency on the lock_word including rb_state,
-  // to prevent load-load reordering, and without using
-  // a memory barrier (which would be more expensive).
-  // `obj` is unchanged by this operation, but its value now depends
-  // on `temp`.
-  __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
-
-  // The actual reference load.
   if (index.IsValid()) {
     // Load types involving an "index": ArrayGet,
     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
@@ -5814,59 +6073,50 @@
           << instruction->AsInvoke()->GetIntrinsic();
       DCHECK_EQ(offset, 0u);
       DCHECK_EQ(scale_factor, 0u);
-      DCHECK_EQ(needs_null_check, 0u);
-      // /* HeapReference<Object> */ ref = *(obj + index)
+      DCHECK_EQ(needs_null_check, false);
+      // /* HeapReference<mirror::Object> */ ref = *(obj + index)
       MemOperand field = HeapOperand(obj, XRegisterFrom(index));
       LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
     } else {
-      // ArrayGet and UnsafeGetObject intrinsics cases.
-      // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+      // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
+      // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
       if (index.IsConstant()) {
         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
         Load(type, ref_reg, HeapOperand(obj, computed_offset));
+        if (needs_null_check) {
+          MaybeRecordImplicitNullCheck(instruction);
+        }
       } else {
-        Register temp3 = temps.AcquireW();
-        __ Add(temp3, obj, offset);
-        Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor));
-        temps.Release(temp3);
+        UseScratchRegisterScope temps(GetVIXLAssembler());
+        Register temp = temps.AcquireW();
+        __ Add(temp, obj, offset);
+        {
+          EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+          Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
+          if (needs_null_check) {
+            MaybeRecordImplicitNullCheck(instruction);
+          }
+        }
       }
     }
   } else {
-    // /* HeapReference<Object> */ ref = *(obj + offset)
+    // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
     MemOperand field = HeapOperand(obj, offset);
     if (use_load_acquire) {
-      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+      // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
+      LoadAcquire(instruction, ref_reg, field, needs_null_check);
     } else {
+      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
       Load(type, ref_reg, field);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
     }
   }
 
   // Object* ref = ref_addr->AsMirrorPtr()
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-
-  // Slow path marking the object `ref` when it is gray.
-  SlowPathCodeARM64* slow_path;
-  if (always_update_field) {
-    // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
-    // address of the form `obj + field_offset`, where `obj` is a
-    // register and `field_offset` is a register. Thus `offset` and
-    // `scale_factor` above are expected to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
-    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
-        instruction, ref, obj, /* field_offset */ index, temp);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
-  }
-  AddSlowPath(slow_path);
-
-  // if (rb_state == ReadBarrier::GrayState())
-  //   ref = ReadBarrier::Mark(ref);
-  // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-  __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
 }
 
 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5faf29a..7471cd5 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -616,8 +616,8 @@
                                              Location index,
                                              vixl::aarch64::Register temp,
                                              bool needs_null_check);
-  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
   //
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
@@ -636,6 +636,16 @@
                                                  bool use_load_acquire,
                                                  bool always_update_field = false);
 
+  // Generate a heap reference load (with no read barrier).
+  void GenerateRawReferenceLoad(HInstruction* instruction,
+                                Location ref,
+                                vixl::aarch64::Register obj,
+                                uint32_t offset,
+                                Location index,
+                                size_t scale_factor,
+                                bool needs_null_check,
+                                bool use_load_acquire);
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
@@ -761,8 +771,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 5c4ca5b..d75779c 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -42,6 +42,7 @@
 using helpers::DWARFReg;
 using helpers::HighDRegisterFrom;
 using helpers::HighRegisterFrom;
+using helpers::InputDRegisterAt;
 using helpers::InputOperandAt;
 using helpers::InputRegister;
 using helpers::InputRegisterAt;
@@ -53,6 +54,7 @@
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
+using helpers::OperandFrom;
 using helpers::OutputRegister;
 using helpers::OutputSRegister;
 using helpers::OutputVRegister;
@@ -657,52 +659,25 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
 };
 
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
 //
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL {
- public:
-  ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
-                                 Location ref,
-                                 Location entrypoint = Location::NoLocation())
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
+ protected:
+  ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
       : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
-  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
 
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
+  // Generate assembly code calling the read barrier marking runtime
+  // entry point (ReadBarrierMarkRegX).
+  void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
     vixl32::Register ref_reg = RegisterFrom(ref_);
-    DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
-    DCHECK(instruction_->IsInstanceFieldGet() ||
-           instruction_->IsStaticFieldGet() ||
-           instruction_->IsArrayGet() ||
-           instruction_->IsArraySet() ||
-           instruction_->IsLoadClass() ||
-           instruction_->IsLoadString() ||
-           instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
-           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
-        << "Unexpected instruction in read barrier marking slow path: "
-        << instruction_->DebugName();
-    // The read barrier instrumentation of object ArrayGet
-    // instructions does not support the HIntermediateAddress
-    // instruction.
-    DCHECK(!(instruction_->IsArrayGet() &&
-             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
-    __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
@@ -732,53 +707,108 @@
       arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
       __ Blx(RegisterFrom(entrypoint_));
     } else {
+      // Entrypoint is not already loaded, load from the thread.
       int32_t entry_point_offset =
           CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
       // This runtime call does not require a stack map.
       arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     }
-    __ B(GetExitLabel());
   }
 
- private:
   // The location (register) of the marked object reference.
   const Location ref_;
 
   // The location of the entrypoint if already loaded.
   const Location entrypoint_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
 };
 
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking.
 //
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is when the decision to mark is based on whether the GC is marking.
+class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
  public:
-  ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
-                                               Location ref,
-                                               vixl32::Register obj,
-                                               Location field_offset,
-                                               vixl32::Register temp1,
-                                               vixl32::Register temp2)
-      : SlowPathCodeARMVIXL(instruction),
-        ref_(ref),
-        obj_(obj),
-        field_offset_(field_offset),
-        temp1_(temp1),
-        temp2_(temp2) {
+  ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
+                                 Location ref,
+                                 Location entrypoint = Location::NoLocation())
+      : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(ref_.IsRegister()) << ref_;
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    GenerateReadBarrierMarkRuntimeCall(codegen);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
+ public:
+  LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
+                                                   Location ref,
+                                                   vixl32::Register obj,
+                                                   uint32_t offset,
+                                                   Location index,
+                                                   ScaleFactor scale_factor,
+                                                   bool needs_null_check,
+                                                   vixl32::Register temp,
+                                                   Location entrypoint)
+      : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
+        obj_(obj),
+        offset_(offset),
+        index_(index),
+        scale_factor_(scale_factor),
+        needs_null_check_(needs_null_check),
+        temp_(temp) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
   const char* GetDescription() const OVERRIDE {
-    return "ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL";
+    return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL";
   }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
@@ -786,64 +816,233 @@
     vixl32::Register ref_reg = RegisterFrom(ref_);
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
-    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+
+    // Temporary register `temp_`, used to store the lock word, must
+    // not be IP, as we may use it to emit the reference load (in the
+    // call to GenerateRawReferenceLoad below), and we need the lock
+    // word to still be in `temp_` after the reference load.
+    DCHECK(!temp_.Is(ip));
+
+    __ Bind(GetEntryLabel());
+
+    // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+    // inserted after the original load. However, in fast path based
+    // Baker's read barriers, we need to perform the load of
+    // mirror::Object::monitor_ *before* the original reference load.
+    // This load-load ordering is required by the read barrier.
+    // The fast path/slow path (for Baker's algorithm) should look like:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //   }
+    //
+    // Note: the original implementation in ReadBarrier::Barrier is
+    // slightly more complex as it performs additional checks that we do
+    // not do here for performance reasons.
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+
+    // /* int32_t */ monitor = obj->monitor_
+    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+    arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+    if (needs_null_check_) {
+      codegen->MaybeRecordImplicitNullCheck(instruction_);
+    }
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `obj` is unchanged by this operation, but its value now depends
+    // on `temp`.
+    __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32));
+
+    // The actual reference load.
+    // A possible implicit null check has already been handled above.
+    arm_codegen->GenerateRawReferenceLoad(
+        instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+    // Mark the object `ref` when `obj` is gray.
+    //
+    //   if (rb_state == ReadBarrier::GrayState())
+    //     ref = ReadBarrier::Mark(ref);
+    //
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+    __ B(cc, GetExitLabel());  // Carry flag is the last bit shifted out by LSRS.
+    GenerateReadBarrierMarkRuntimeCall(codegen);
+
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The register containing the object holding the marked object reference field.
+  vixl32::Register obj_;
+  // The offset, index and scale factor to access the reference in `obj_`.
+  uint32_t offset_;
+  Location index_;
+  ScaleFactor scale_factor_;
+  // Is a null check required?
+  bool needs_null_check_;
+  // A temporary register used to hold the lock word of `obj_`.
+  vixl32::Register temp_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never
+// tries to update `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+//
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+    : public ReadBarrierMarkSlowPathBaseARMVIXL {
+ public:
+  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 vixl32::Register obj,
+                                                                 uint32_t offset,
+                                                                 Location index,
+                                                                 ScaleFactor scale_factor,
+                                                                 bool needs_null_check,
+                                                                 vixl32::Register temp1,
+                                                                 vixl32::Register temp2,
+                                                                 Location entrypoint)
+      : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
+        obj_(obj),
+        offset_(offset),
+        index_(index),
+        scale_factor_(scale_factor),
+        needs_null_check_(needs_null_check),
+        temp1_(temp1),
+        temp2_(temp2) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    vixl32::Register ref_reg = RegisterFrom(ref_);
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
+    DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
+
+    // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking and field updating slow path: "
         << instruction_->DebugName();
     DCHECK(instruction_->GetLocations()->Intrinsified());
     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
-    DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+    DCHECK_EQ(offset_, 0u);
+    DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+    Location field_offset = index_;
+    DCHECK(field_offset.IsRegisterPair()) << field_offset;
+
+    // Temporary register `temp1_`, used to store the lock word, must
+    // not be IP, as we may use it to emit the reference load (in the
+    // call to GenerateRawReferenceLoad below), and we need the lock
+    // word to still be in `temp1_` after the reference load.
+    DCHECK(!temp1_.Is(ip));
 
     __ Bind(GetEntryLabel());
 
-    // Save the old reference.
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+
+    // /* int32_t */ monitor = obj->monitor_
+    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+    arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+    if (needs_null_check_) {
+      codegen->MaybeRecordImplicitNullCheck(instruction_);
+    }
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `obj` is unchanged by this operation, but its value now depends
+    // on `temp`.
+    __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
+
+    // The actual reference load.
+    // A possible implicit null check has already been handled above.
+    arm_codegen->GenerateRawReferenceLoad(
+        instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+    // Mark the object `ref` when `obj` is gray.
+    //
+    //   if (rb_state == ReadBarrier::GrayState())
+    //     ref = ReadBarrier::Mark(ref);
+    //
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+    __ B(cc, GetExitLabel());  // Carry flag is the last bit shifted out by LSRS.
+
+    // Save the old value of the reference before marking it.
     // Note that we cannot use IP to save the old reference, as IP is
     // used internally by the ReadBarrierMarkRegX entry point, and we
     // need the old reference after the call to that entry point.
     DCHECK(!temp1_.Is(ip));
     __ Mov(temp1_, ref_reg);
 
-    // No need to save live registers; it's taken care of by the
-    // entrypoint. Also, there is no need to update the stack mask,
-    // as this runtime call will not trigger a garbage collection.
-    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-    DCHECK(!ref_reg.Is(sp));
-    DCHECK(!ref_reg.Is(lr));
-    DCHECK(!ref_reg.Is(pc));
-    // IP is used internally by the ReadBarrierMarkRegX entry point
-    // as a temporary, it cannot be the entry point's input/output.
-    DCHECK(!ref_reg.Is(ip));
-    DCHECK(ref_reg.IsRegister()) << ref_reg;
-    // "Compact" slow path, saving two moves.
-    //
-    // Instead of using the standard runtime calling convention (input
-    // and output in R0):
-    //
-    //   R0 <- ref
-    //   R0 <- ReadBarrierMark(R0)
-    //   ref <- R0
-    //
-    // we just use rX (the register containing `ref`) as input and output
-    // of a dedicated entrypoint:
-    //
-    //   rX <- ReadBarrierMarkRegX(rX)
-    //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
-    // This runtime call does not require a stack map.
-    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    GenerateReadBarrierMarkRuntimeCall(codegen);
 
     // If the new reference is different from the old reference,
-    // update the field in the holder (`*(obj_ + field_offset_)`).
+    // update the field in the holder (`*(obj_ + field_offset)`).
     //
     // Note that this field could also hold a different object, if
     // another thread had concurrently changed it. In that case, the
     // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
     // (CAS) operation below would abort the CAS, leaving the field
     // as-is.
-    vixl32::Label done;
     __ Cmp(temp1_, ref_reg);
-    __ B(eq, &done, /* far_target */ false);
+    __ B(eq, GetExitLabel());
 
     // Update the the holder's field atomically.  This may fail if
     // mutator updates before us, but it's OK.  This is achieved
@@ -857,7 +1056,7 @@
     // The UnsafeCASObject intrinsic uses a register pair as field
     // offset ("long offset"), of which only the low part contains
     // data.
-    vixl32::Register offset = LowRegisterFrom(field_offset_);
+    vixl32::Register offset = LowRegisterFrom(field_offset);
     vixl32::Register expected = temp1_;
     vixl32::Register value = ref_reg;
     vixl32::Register tmp_ptr = temps.Acquire();       // Pointer to actual memory.
@@ -913,22 +1112,27 @@
       }
     }
 
-    __ Bind(&done);
     __ B(GetExitLabel());
   }
 
  private:
-  // The location (register) of the marked object reference.
-  const Location ref_;
   // The register containing the object holding the marked object reference field.
   const vixl32::Register obj_;
-  // The location of the offset of the marked reference field within `obj_`.
-  Location field_offset_;
-
+  // The offset, index and scale factor to access the reference in `obj_`.
+  uint32_t offset_;
+  Location index_;
+  ScaleFactor scale_factor_;
+  // Is a null check required?
+  bool needs_null_check_;
+  // A temporary register used to hold the lock word of `obj_`; and
+  // also to hold the original reference value, when the reference is
+  // marked.
   const vixl32::Register temp1_;
+  // A temporary register used in the implementation of the CAS, to
+  // update the object's reference field.
   const vixl32::Register temp2_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL);
+  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
 };
 
 // Slow path generating a read barrier for a heap reference.
@@ -1216,6 +1420,17 @@
   }
 }
 
+inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
+    case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
+    case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+  }
+}
+
 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << vixl32::Register(reg);
 }
@@ -1260,8 +1475,496 @@
   return 0;
 }
 
+static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
+                                        vixl32::Register out,
+                                        vixl32::Register first,
+                                        const Operand& second,
+                                        CodeGeneratorARMVIXL* codegen) {
+  if (second.IsImmediate() && second.GetImmediate() == 0) {
+    const Operand in = kind == HInstruction::kAnd
+        ? Operand(0)
+        : Operand(first);
+
+    __ Mov(out, in);
+  } else {
+    switch (kind) {
+      case HInstruction::kAdd:
+        __ Add(out, first, second);
+        break;
+      case HInstruction::kAnd:
+        __ And(out, first, second);
+        break;
+      case HInstruction::kOr:
+        __ Orr(out, first, second);
+        break;
+      case HInstruction::kSub:
+        __ Sub(out, first, second);
+        break;
+      case HInstruction::kXor:
+        __ Eor(out, first, second);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected instruction kind: " << kind;
+        UNREACHABLE();
+    }
+  }
+}
+
+static void GenerateDataProc(HInstruction::InstructionKind kind,
+                             const Location& out,
+                             const Location& first,
+                             const Operand& second_lo,
+                             const Operand& second_hi,
+                             CodeGeneratorARMVIXL* codegen) {
+  const vixl32::Register first_hi = HighRegisterFrom(first);
+  const vixl32::Register first_lo = LowRegisterFrom(first);
+  const vixl32::Register out_hi = HighRegisterFrom(out);
+  const vixl32::Register out_lo = LowRegisterFrom(out);
+
+  if (kind == HInstruction::kAdd) {
+    __ Adds(out_lo, first_lo, second_lo);
+    __ Adc(out_hi, first_hi, second_hi);
+  } else if (kind == HInstruction::kSub) {
+    __ Subs(out_lo, first_lo, second_lo);
+    __ Sbc(out_hi, first_hi, second_hi);
+  } else {
+    GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
+    GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
+  }
+}
+
+static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
+  return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
+}
+
+static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
+                                 CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+  DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+  const LocationSummary* const locations = instruction->GetLocations();
+  const uint32_t shift_value = instruction->GetShiftAmount();
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  const Location first = locations->InAt(0);
+  const Location second = locations->InAt(1);
+  const Location out = locations->Out();
+  const vixl32::Register first_hi = HighRegisterFrom(first);
+  const vixl32::Register first_lo = LowRegisterFrom(first);
+  const vixl32::Register out_hi = HighRegisterFrom(out);
+  const vixl32::Register out_lo = LowRegisterFrom(out);
+  const vixl32::Register second_hi = HighRegisterFrom(second);
+  const vixl32::Register second_lo = LowRegisterFrom(second);
+  const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
+
+  if (shift_value >= 32) {
+    if (shift == ShiftType::LSL) {
+      GenerateDataProcInstruction(kind,
+                                  out_hi,
+                                  first_hi,
+                                  Operand(second_lo, ShiftType::LSL, shift_value - 32),
+                                  codegen);
+      GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
+    } else if (shift == ShiftType::ASR) {
+      GenerateDataProc(kind,
+                       out,
+                       first,
+                       GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
+                       Operand(second_hi, ShiftType::ASR, 31),
+                       codegen);
+    } else {
+      DCHECK_EQ(shift, ShiftType::LSR);
+      GenerateDataProc(kind,
+                       out,
+                       first,
+                       GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
+                       0,
+                       codegen);
+    }
+  } else {
+    DCHECK_GT(shift_value, 1U);
+    DCHECK_LT(shift_value, 32U);
+
+    UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+    if (shift == ShiftType::LSL) {
+      // We are not doing this for HInstruction::kAdd because the output will require
+      // Location::kOutputOverlap; not applicable to other cases.
+      if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+        GenerateDataProcInstruction(kind,
+                                    out_hi,
+                                    first_hi,
+                                    Operand(second_hi, ShiftType::LSL, shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_hi,
+                                    out_hi,
+                                    Operand(second_lo, ShiftType::LSR, 32 - shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_lo,
+                                    first_lo,
+                                    Operand(second_lo, ShiftType::LSL, shift_value),
+                                    codegen);
+      } else {
+        const vixl32::Register temp = temps.Acquire();
+
+        __ Lsl(temp, second_hi, shift_value);
+        __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
+        GenerateDataProc(kind,
+                         out,
+                         first,
+                         Operand(second_lo, ShiftType::LSL, shift_value),
+                         temp,
+                         codegen);
+      }
+    } else {
+      DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
+
+      // We are not doing this for HInstruction::kAdd because the output will require
+      // Location::kOutputOverlap; not applicable to other cases.
+      if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+        GenerateDataProcInstruction(kind,
+                                    out_lo,
+                                    first_lo,
+                                    Operand(second_lo, ShiftType::LSR, shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_lo,
+                                    out_lo,
+                                    Operand(second_hi, ShiftType::LSL, 32 - shift_value),
+                                    codegen);
+        GenerateDataProcInstruction(kind,
+                                    out_hi,
+                                    first_hi,
+                                    Operand(second_hi, shift, shift_value),
+                                    codegen);
+      } else {
+        const vixl32::Register temp = temps.Acquire();
+
+        __ Lsr(temp, second_lo, shift_value);
+        __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
+        GenerateDataProc(kind,
+                         out,
+                         first,
+                         temp,
+                         Operand(second_hi, shift, shift_value),
+                         codegen);
+      }
+    }
+  }
+}
+
+static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
+  const Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+
+    const Primitive::Type type = instruction->InputAt(0)->GetType();
+
+    if (type == Primitive::kPrimFloat) {
+      __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
+    }
+  } else {
+    __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
+  }
+}
+
+static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
+                                                  bool invert,
+                                                  CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
+  vixl32::Condition ret = eq;
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  DCHECK(right.IsConstant());
+
+  const vixl32::Register left_high = HighRegisterFrom(left);
+  const vixl32::Register left_low = LowRegisterFrom(left);
+  int64_t value = Int64ConstantFrom(right);
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE: {
+      __ Cmp(left_high, High32Bits(value));
+
+      ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                               2 * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      __ it(eq);
+      __ cmp(eq, left_low, Low32Bits(value));
+      ret = ARMUnsignedCondition(cond);
+      break;
+    }
+    case kCondLE:
+    case kCondGT:
+      // Trivially true or false.
+      if (value == std::numeric_limits<int64_t>::max()) {
+        __ Cmp(left_low, left_low);
+        ret = cond == kCondLE ? eq : ne;
+        break;
+      }
+
+      if (cond == kCondLE) {
+        cond = kCondLT;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        cond = kCondGE;
+      }
+
+      value++;
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT: {
+      UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+      __ Cmp(left_low, Low32Bits(value));
+      __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
+      ret = ARMCondition(cond);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static vixl32::Condition GenerateLongTest(HCondition* condition,
+                                          bool invert,
+                                          CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
+  vixl32::Condition ret = eq;
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  DCHECK(right.IsRegisterPair());
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE: {
+      __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
+
+      ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                               2 * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      __ it(eq);
+      __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
+      ret = ARMUnsignedCondition(cond);
+      break;
+    }
+    case kCondLE:
+    case kCondGT:
+      if (cond == kCondLE) {
+        cond = kCondGE;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        cond = kCondLT;
+      }
+
+      std::swap(left, right);
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT: {
+      UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+      __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
+      __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
+      ret = ARMCondition(cond);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static vixl32::Condition GenerateTest(HInstruction* instruction,
+                                      Location loc,
+                                      bool invert,
+                                      CodeGeneratorARMVIXL* codegen) {
+  DCHECK(!instruction->IsConstant());
+
+  vixl32::Condition ret = invert ? eq : ne;
+
+  if (IsBooleanValueOrMaterializedCondition(instruction)) {
+    __ Cmp(RegisterFrom(loc), 0);
+  } else {
+    HCondition* const condition = instruction->AsCondition();
+    const Primitive::Type type = condition->GetLeft()->GetType();
+    const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
+
+    if (type == Primitive::kPrimLong) {
+      ret = condition->GetLocations()->InAt(1).IsConstant()
+          ? GenerateLongTestConstant(condition, invert, codegen)
+          : GenerateLongTest(condition, invert, codegen);
+    } else if (Primitive::IsFloatingPointType(type)) {
+      GenerateVcmp(condition, codegen);
+      __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+      ret = ARMFPCondition(cond, condition->IsGtBias());
+    } else {
+      DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+      __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
+      ret = ARMCondition(cond);
+    }
+  }
+
+  return ret;
+}
+
+static bool CanGenerateTest(HInstruction* condition, ArmVIXLAssembler* assembler) {
+  if (!IsBooleanValueOrMaterializedCondition(condition)) {
+    const HCondition* const cond = condition->AsCondition();
+
+    if (cond->GetLeft()->GetType() == Primitive::kPrimLong) {
+      const LocationSummary* const locations = cond->GetLocations();
+      const IfCondition c = cond->GetCondition();
+
+      if (locations->InAt(1).IsConstant()) {
+        const int64_t value = Int64ConstantFrom(locations->InAt(1));
+
+        if (c < kCondLT || c > kCondGE) {
+          // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+          // we check that the least significant half of the first input to be compared
+          // is in a low register (the other half is read outside an IT block), and
+          // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+          // encoding can be used.
+          if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+            return false;
+          }
+        // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
+        // the previous one, but are not strictly necessary.
+        } else if (c == kCondLE || c == kCondGT) {
+          if (value < std::numeric_limits<int64_t>::max() &&
+              !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
+            return false;
+          }
+        } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
+  const Primitive::Type type = constant->GetType();
+  bool ret = false;
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    const uint64_t value = Uint64ConstantFrom(constant);
+
+    ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
+  } else {
+    ret = IsUint<8>(Int32ConstantFrom(constant));
+  }
+
+  return ret;
+}
+
+static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+
+  if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
+static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
+  // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+  // we check that we are not dealing with floating-point output (there is no
+  // 16-bit VMOV encoding).
+  if (!out.IsRegister() && !out.IsRegisterPair()) {
+    return false;
+  }
+
+  // For constants, we also check that the output is in one or two low registers,
+  // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
+  // MOV encoding can be used.
+  if (src.IsConstant()) {
+    if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
+      return false;
+    }
+
+    if (out.IsRegister()) {
+      if (!RegisterFrom(out).IsLow()) {
+        return false;
+      }
+    } else {
+      DCHECK(out.IsRegisterPair());
+
+      if (!HighRegisterFrom(out).IsLow()) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
 #undef __
 
+vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
+                                                   vixl32::Label* final_label) {
+  DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+
+  const HBasicBlock* const block = instruction->GetBlock();
+  const HLoopInformation* const info = block->GetLoopInformation();
+  HInstruction* const next = instruction->GetNext();
+
+  // Avoid a branch to a branch.
+  if (next->IsGoto() && (info == nullptr ||
+                         !info->IsBackEdge(*block) ||
+                         !info->HasSuspendCheck())) {
+    final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
+  }
+
+  return final_label;
+}
+
 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
                                            const ArmInstructionSetFeatures& isa_features,
                                            const CompilerOptions& compiler_options,
@@ -1291,23 +1994,16 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
-  // Give d14 and d15 as scratch registers to VIXL.
-  // They are removed from the register allocator in `SetupBlockedRegisters()`.
-  // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
-  // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
-  // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
-  // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
-  // also want to investigate giving those 14 other D registers to the allocator.
-  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
-  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
+  // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
+  // S0-S31, which alias to D0-D15.
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
 }
 
 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
@@ -1373,13 +2069,6 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
-  // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
-  blocked_fpu_registers_[28] = true;
-  blocked_fpu_registers_[29] = true;
-  blocked_fpu_registers_[30] = true;
-  blocked_fpu_registers_[31] = true;
-
   if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
@@ -1755,43 +2444,6 @@
 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) {
-  Primitive::Type type = instruction->InputAt(0)->GetType();
-  Location lhs_loc = instruction->GetLocations()->InAt(0);
-  Location rhs_loc = instruction->GetLocations()->InAt(1);
-  if (rhs_loc.IsConstant()) {
-    // 0.0 is the only immediate that can be encoded directly in
-    // a VCMP instruction.
-    //
-    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
-    // specify that in a floating-point comparison, positive zero
-    // and negative zero are considered equal, so we can use the
-    // literal 0.0 for both cases here.
-    //
-    // Note however that some methods (Float.equal, Float.compare,
-    // Float.compareTo, Double.equal, Double.compare,
-    // Double.compareTo, Math.max, Math.min, StrictMath.max,
-    // StrictMath.min) consider 0.0 to be (strictly) greater than
-    // -0.0. So if we ever translate calls to these methods into a
-    // HCompare instruction, we must handle the -0.0 case with
-    // care here.
-    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
-    if (type == Primitive::kPrimFloat) {
-      __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(F64, DRegisterFrom(lhs_loc), 0.0);
-    }
-  } else {
-    if (type == Primitive::kPrimFloat) {
-      __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(DRegisterFrom(lhs_loc), DRegisterFrom(rhs_loc));
-    }
-  }
-}
-
 void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond,
                                                       vixl32::Label* true_label,
                                                       vixl32::Label* false_label ATTRIBUTE_UNUSED) {
@@ -1900,7 +2552,7 @@
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      GenerateVcmp(condition);
+      GenerateVcmp(condition, codegen_);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
@@ -1977,20 +2629,29 @@
       return;
     }
 
-    LocationSummary* locations = cond->GetLocations();
-    DCHECK(locations->InAt(0).IsRegister());
-    vixl32::Register left = InputRegisterAt(cond, 0);
-    Location right = locations->InAt(1);
-    if (right.IsRegister()) {
-      __ Cmp(left, InputRegisterAt(cond, 1));
-    } else {
-      DCHECK(right.IsConstant());
-      __ Cmp(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-    }
+    vixl32::Label* non_fallthrough_target;
+    vixl32::Condition arm_cond = vixl32::Condition::None();
+    const vixl32::Register left = InputRegisterAt(cond, 0);
+    const Operand right = InputOperandAt(cond, 1);
+
     if (true_target == nullptr) {
-      __ B(ARMCondition(condition->GetOppositeCondition()), false_target);
+      arm_cond = ARMCondition(condition->GetOppositeCondition());
+      non_fallthrough_target = false_target;
     } else {
-      __ B(ARMCondition(condition->GetCondition()), true_target);
+      arm_cond = ARMCondition(condition->GetCondition());
+      non_fallthrough_target = true_target;
+    }
+
+    if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
+      if (arm_cond.Is(eq)) {
+        __ CompareAndBranchIfZero(left, non_fallthrough_target);
+      } else {
+        DCHECK(arm_cond.Is(ne));
+        __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+      }
+    } else {
+      __ Cmp(left, right);
+      __ B(arm_cond, non_fallthrough_target);
     }
   }
 
@@ -2051,29 +2712,135 @@
 
 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  if (Primitive::IsFloatingPointType(select->GetType())) {
+  const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType());
+
+  if (is_floating_point) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
   }
+
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
-    locations->SetInAt(2, Location::RequiresRegister());
+    locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
+    // The code generator handles overlap with the values, but not with the condition.
+    locations->SetOut(Location::SameAsFirstInput());
+  } else if (is_floating_point) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    if (!locations->InAt(1).IsConstant()) {
+      locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
+    }
+
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
-  locations->SetOut(Location::SameAsFirstInput());
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
-  LocationSummary* locations = select->GetLocations();
-  vixl32::Label false_target;
-  GenerateTestAndBranch(select,
-                        /* condition_input_index */ 2,
-                        /* true_target */ nullptr,
-                        &false_target,
-                        /* far_target */ false);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  HInstruction* const condition = select->GetCondition();
+  const LocationSummary* const locations = select->GetLocations();
+  const Primitive::Type type = select->GetType();
+  const Location first = locations->InAt(0);
+  const Location out = locations->Out();
+  const Location second = locations->InAt(1);
+  Location src;
+
+  if (condition->IsIntConstant()) {
+    if (condition->AsIntConstant()->IsFalse()) {
+      src = first;
+    } else {
+      src = second;
+    }
+
+    codegen_->MoveLocation(out, src, type);
+    return;
+  }
+
+  if (!Primitive::IsFloatingPointType(type) &&
+      CanGenerateTest(condition, codegen_->GetAssembler())) {
+    bool invert = false;
+
+    if (out.Equals(second)) {
+      src = first;
+      invert = true;
+    } else if (out.Equals(first)) {
+      src = second;
+    } else if (second.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
+      src = second;
+    } else if (first.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
+      src = first;
+      invert = true;
+    } else {
+      src = second;
+    }
+
+    if (CanGenerateConditionalMove(out, src)) {
+      if (!out.Equals(first) && !out.Equals(second)) {
+        codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
+      }
+
+      const vixl32::Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_);
+      const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
+      ExactAssemblyScope guard(GetVIXLAssembler(),
+                               instr_count * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      if (out.IsRegister()) {
+        __ it(cond);
+        __ mov(cond, RegisterFrom(out), OperandFrom(src, type));
+      } else {
+        DCHECK(out.IsRegisterPair());
+
+        Operand operand_high(0);
+        Operand operand_low(0);
+
+        if (src.IsConstant()) {
+          const int64_t value = Int64ConstantFrom(src);
+
+          operand_high = High32Bits(value);
+          operand_low = Low32Bits(value);
+        } else {
+          DCHECK(src.IsRegisterPair());
+          operand_high = HighRegisterFrom(src);
+          operand_low = LowRegisterFrom(src);
+        }
+
+        __ it(cond);
+        __ mov(cond, LowRegisterFrom(out), operand_low);
+        __ it(cond);
+        __ mov(cond, HighRegisterFrom(out), operand_high);
+      }
+
+      return;
+    }
+  }
+
+  vixl32::Label* false_target = nullptr;
+  vixl32::Label* true_target = nullptr;
+  vixl32::Label select_end;
+  vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
+
+  if (out.Equals(second)) {
+    true_target = target;
+    src = first;
+  } else {
+    false_target = target;
+    src = second;
+
+    if (!out.Equals(first)) {
+      codegen_->MoveLocation(out, first, type);
+    }
+  }
+
+  GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
+  codegen_->MoveLocation(out, src, type);
+
+  if (select_end.IsReferenced()) {
+    __ Bind(&select_end);
+  }
 }
 
 void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -2151,7 +2918,7 @@
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      GenerateVcmp(cond);
+      GenerateVcmp(cond, codegen_);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -4166,7 +4933,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ Mov(out, 0);
-      GenerateVcmp(compare);
+      GenerateVcmp(compare, codegen_);
       // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
       __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
@@ -4536,17 +5303,24 @@
     return true;
   }
   Opcode neg_opcode = kNoOperand;
+  uint32_t neg_value = 0;
   switch (opcode) {
-    case AND: neg_opcode = BIC; value = ~value; break;
-    case ORR: neg_opcode = ORN; value = ~value; break;
-    case ADD: neg_opcode = SUB; value = -value; break;
-    case ADC: neg_opcode = SBC; value = ~value; break;
-    case SUB: neg_opcode = ADD; value = -value; break;
-    case SBC: neg_opcode = ADC; value = ~value; break;
+    case AND: neg_opcode = BIC; neg_value = ~value; break;
+    case ORR: neg_opcode = ORN; neg_value = ~value; break;
+    case ADD: neg_opcode = SUB; neg_value = -value; break;
+    case ADC: neg_opcode = SBC; neg_value = ~value; break;
+    case SUB: neg_opcode = ADD; neg_value = -value; break;
+    case SBC: neg_opcode = ADC; neg_value = ~value; break;
+    case MOV: neg_opcode = MVN; neg_value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc);
+
+  if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, set_cc)) {
+    return true;
+  }
+
+  return opcode == AND && IsPowerOfTwo(value + 1);
 }
 
 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
@@ -5484,20 +6258,56 @@
   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+  // If both index and length are constants we can statically check the bounds. But if at least one
+  // of them is not encodable ArmEncodableConstantOrRegister will create
+  // Location::RequiresRegister() which is not desired to happen. Instead we create constant
+  // locations.
+  bool both_const = index->IsConstant() && length->IsConstant();
+  locations->SetInAt(0, both_const
+      ? Location::ConstantLocation(index->AsConstant())
+      : ArmEncodableConstantOrRegister(index, CMP));
+  locations->SetInAt(1, both_const
+      ? Location::ConstantLocation(length->AsConstant())
+      : ArmEncodableConstantOrRegister(length, CMP));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
-  SlowPathCodeARMVIXL* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
-  codegen_->AddSlowPath(slow_path);
+  LocationSummary* locations = instruction->GetLocations();
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
 
-  vixl32::Register index = InputRegisterAt(instruction, 0);
-  vixl32::Register length = InputRegisterAt(instruction, 1);
+  if (length_loc.IsConstant()) {
+    int32_t length = Int32ConstantFrom(length_loc);
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guaranteed to pass.
+      int32_t index = Int32ConstantFrom(index_loc);
+      if (index < 0 || index >= length) {
+        SlowPathCodeARMVIXL* slow_path =
+            new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ B(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
 
-  __ Cmp(index, length);
-  __ B(hs, slow_path->GetEntryLabel());
+    SlowPathCodeARMVIXL* slow_path =
+        new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+    __ Cmp(RegisterFrom(index_loc), length);
+    codegen_->AddSlowPath(slow_path);
+    __ B(hs, slow_path->GetEntryLabel());
+  } else {
+    SlowPathCodeARMVIXL* slow_path =
+        new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+    __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
+    codegen_->AddSlowPath(slow_path);
+    __ B(ls, slow_path->GetEntryLabel());
+  }
 }
 
 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
@@ -5727,13 +6537,16 @@
 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
   // TODO(VIXL32): Double check the performance of this implementation.
   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
-  vixl32::SRegister temp_1 = temps.AcquireS();
-  vixl32::SRegister temp_2 = temps.AcquireS();
+  vixl32::Register temp1 = temps.Acquire();
+  ScratchRegisterScope ensure_scratch(
+      this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
+  vixl32::Register temp2(ensure_scratch.GetRegister());
 
-  __ Vldr(temp_1, MemOperand(sp, mem1));
-  __ Vldr(temp_2, MemOperand(sp, mem2));
-  __ Vstr(temp_1, MemOperand(sp, mem2));
-  __ Vstr(temp_2, MemOperand(sp, mem1));
+  int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+  GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
+  GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
+  GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
+  GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
 }
 
 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
@@ -5756,7 +6569,7 @@
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
-    vixl32::SRegister temp = temps.AcquireS();
+    vixl32::Register temp = temps.Acquire();
     __ Vmov(temp, SRegisterFrom(source));
     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
     __ Vmov(SRegisterFrom(destination), temp);
@@ -5815,12 +6628,12 @@
   }
 }
 
-void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
+  __ Push(vixl32::Register(reg));
 }
 
-void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
+  __ Pop(vixl32::Register(reg));
 }
 
 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
@@ -6781,6 +7594,60 @@
   }
 }
 
+void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* instruction) {
+  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+         instruction->GetType() == Primitive::kPrimLong);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  const bool overlap = instruction->GetType() == Primitive::kPrimLong &&
+                       HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* instruction) {
+  const LocationSummary* const locations = instruction->GetLocations();
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+  if (instruction->GetType() == Primitive::kPrimInt) {
+    DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+
+    const vixl32::Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong
+        ? LowRegisterFrom(locations->InAt(1))
+        : InputRegisterAt(instruction, 1);
+
+    GenerateDataProcInstruction(kind,
+                                OutputRegister(instruction),
+                                InputRegisterAt(instruction, 0),
+                                Operand(second,
+                                        ShiftFromOpKind(op_kind),
+                                        instruction->GetShiftAmount()),
+                                codegen_);
+  } else {
+    DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+
+    if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+      const vixl32::Register second = InputRegisterAt(instruction, 1);
+
+      DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
+      GenerateDataProc(kind,
+                       locations->Out(),
+                       locations->InAt(0),
+                       second,
+                       Operand(second, ShiftType::ASR, 31),
+                       codegen_);
+    } else {
+      GenerateLongDataProc(instruction, codegen_);
+    }
+  }
+}
+
 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
                                                        vixl32::Register first,
@@ -6797,10 +7664,12 @@
     return;
   }
   if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
-  __ And(out, first, value);
+    __ And(out, first, value);
+  } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
+    __ Bic(out, first, ~value);
   } else {
-    DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value));
-  __ Bic(out, first, ~value);
+    DCHECK(IsPowerOfTwo(value + 1));
+    __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
   }
 }
 
@@ -7019,14 +7888,35 @@
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
-      // Baker's read barrier are used:
+      // Baker's read barrier are used.
       //
-      //   root = obj.field;
+      // Note that we do not actually check the value of
+      // `GetIsGcMarking()` to decide whether to mark the loaded GC
+      // root or not.  Instead, we load into `temp` the read barrier
+      // mark entry point corresponding to register `root`. If `temp`
+      // is null, it means that `GetIsGcMarking()` is false, and vice
+      // versa.
+      //
       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   if (temp != null) {
-      //     root = temp(root)
+      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+      //     // Slow path.
+      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
       //   }
 
+      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+      Location temp = LocationFrom(lr);
+      SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+              instruction, root, /* entrypoint */ temp);
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
       GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
       static_assert(
@@ -7037,21 +7927,6 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path marking the GC root `root`.
-      Location temp = LocationFrom(lr);
-      SlowPathCodeARMVIXL* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
-              instruction,
-              root,
-              /*entrypoint*/ temp);
-      codegen_->AddSlowPath(slow_path);
-
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
       // The entrypoint is null when the GC is not marking, this prevents one load compared to
       // checking GetIsGcMarking.
       __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
@@ -7122,55 +7997,114 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
-  // In slow path based read barriers, the read barrier call is
-  // inserted after the original load. However, in fast path based
-  // Baker's read barriers, we need to perform the load of
-  // mirror::Object::monitor_ *before* the original reference load.
-  // This load-load ordering is required by the read barrier.
-  // The fast path/slow path (for Baker's algorithm) should look like:
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to mark the reference.
+  // Then, in the slow path, check the gray bit in the lock word of
+  // the reference's holder (`obj`) to decide whether to mark `ref` or
+  // not.
   //
-  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
-  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-  //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
-  //   if (is_gray) {
-  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp3` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp3` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp3 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp3(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
-  //
-  // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as it performs additional checks that we do
-  // not do here for performance reasons.
 
-  vixl32::Register ref_reg = RegisterFrom(ref);
   vixl32::Register temp_reg = RegisterFrom(temp);
-  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
-  // /* int32_t */ monitor = obj->monitor_
-  GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
-  if (needs_null_check) {
-    MaybeRecordImplicitNullCheck(instruction);
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp3`.
+  Location temp3 = LocationFrom(lr);
+  SlowPathCodeARMVIXL* slow_path;
+  if (always_update_field) {
+    DCHECK(temp2 != nullptr);
+    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
+    // only supports address of the form `obj + field_offset`, where
+    // `obj` is a register and `field_offset` is a register pair (of
+    // which only the lower half is used). Thus `offset` and
+    // `scale_factor` above are expected to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    Location field_offset = index;
+    slow_path =
+        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+            instruction,
+            ref,
+            obj,
+            offset,
+            /* index */ field_offset,
+            scale_factor,
+            needs_null_check,
+            temp_reg,
+            *temp2,
+            /* entrypoint */ temp3);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+        instruction,
+        ref,
+        obj,
+        offset,
+        index,
+        scale_factor,
+        needs_null_check,
+        temp_reg,
+        /* entrypoint */ temp3);
   }
-  // /* LockWord */ lock_word = LockWord(monitor)
-  static_assert(sizeof(LockWord) == sizeof(int32_t),
-                "art::LockWord and int32_t have different sizes.");
+  AddSlowPath(slow_path);
 
-  // Introduce a dependency on the lock_word including the rb_state,
-  // which shall prevent load-load reordering without using
-  // a memory barrier (which would be more expensive).
-  // `obj` is unchanged by this operation, but its value now depends
-  // on `temp_reg`.
-  __ Add(obj, obj, Operand(temp_reg, ShiftType::LSR, 32));
+  // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
+  // Fast path: just load the reference.
+  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  __ Bind(slow_path->GetExitLabel());
+}
 
-  // The actual reference load.
+void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
+                                                    Location ref,
+                                                    vixl::aarch32::Register obj,
+                                                    uint32_t offset,
+                                                    Location index,
+                                                    ScaleFactor scale_factor,
+                                                    bool needs_null_check) {
+  Primitive::Type type = Primitive::kPrimNot;
+  vixl32::Register ref_reg = RegisterFrom(ref, type);
+
+  // If needed, vixl::EmissionCheckScope guards are used to ensure
+  // that no pools are emitted between the load (macro) instruction
+  // and MaybeRecordImplicitNullCheck.
+
   if (index.IsValid()) {
     // Load types involving an "index": ArrayGet,
     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
     // intrinsics.
-    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
     if (index.IsConstant()) {
       size_t computed_offset =
           (Int32ConstantFrom(index) << scale_factor) + offset;
+      vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
       GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
     } else {
       // Handle the special case of the
       // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
@@ -7180,46 +8114,27 @@
           ? LowRegisterFrom(index)
           : RegisterFrom(index);
       UseScratchRegisterScope temps(GetVIXLAssembler());
-      const vixl32::Register temp3 = temps.Acquire();
-      __ Add(temp3, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
-      GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp3, offset);
+      vixl32::Register temp = temps.Acquire();
+      __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
+      {
+        vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+        GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
+        if (needs_null_check) {
+          MaybeRecordImplicitNullCheck(instruction);
+        }
+      }
     }
   } else {
-    // /* HeapReference<Object> */ ref = *(obj + offset)
+    // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
+    vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
     GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+    if (needs_null_check) {
+      MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   // Object* ref = ref_addr->AsMirrorPtr()
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-
-  // Slow path marking the object `ref` when it is gray.
-  SlowPathCodeARMVIXL* slow_path;
-  if (always_update_field) {
-    DCHECK(temp2 != nullptr);
-    // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address
-    // of the form `obj + field_offset`, where `obj` is a register and
-    // `field_offset` is a register pair (of which only the lower half
-    // is used). Thus `offset` and `scale_factor` above are expected
-    // to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
-    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(
-        instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, ref);
-  }
-  AddSlowPath(slow_path);
-
-  // if (rb_state == ReadBarrier::GrayState())
-  //   ref = ReadBarrier::Mark(ref);
-  // Given the numeric representation, it's enough to check the low bit of the
-  // rb_state. We do that by shifting the bit out of the lock word with LSRS
-  // which can be a 16-bit instruction unlike the TST immediate.
-  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-  __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
-  __ B(cs, slow_path->GetEntryLabel());  // Carry flag is the last bit shifted out by LSRS.
-  __ Bind(slow_path->GetExitLabel());
 }
 
 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
@@ -7494,9 +8409,7 @@
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
@@ -7556,8 +8469,7 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
-      boot_image_address_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -7591,13 +8503,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    VIXLUInt32Literal* literal = entry.second;
-    DCHECK(literal->IsBound());
-    uint32_t literal_offset = literal->GetLocation();
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 3f52c72..781027a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -45,6 +45,11 @@
 namespace art {
 namespace arm {
 
+// This constant is used as an approximate margin when emission of veneer and literal pools
+// must be blocked.
+static constexpr int kMaxMacroInstructionSizeInBytes =
+    15 * vixl::aarch32::kMaxInstructionSizeInBytes;
+
 static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = {
     vixl::aarch32::r1,
     vixl::aarch32::r2,
@@ -396,7 +401,6 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
                                     vixl::aarch32::Label* false_target);
-  void GenerateVcmp(HInstruction* instruction);
   void GenerateFPJumps(HCondition* cond,
                        vixl::aarch32::Label* true_label,
                        vixl::aarch32::Label* false_label);
@@ -505,6 +509,8 @@
     return &(block_labels_[block->GetBlockId()]);
   }
 
+  vixl32::Label* GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label);
+
   void Initialize() OVERRIDE {
     block_labels_.resize(GetGraph()->GetBlocks().size());
   }
@@ -625,6 +631,15 @@
                                                  bool always_update_field = false,
                                                  vixl::aarch32::Register* temp2 = nullptr);
 
+  // Generate a heap reference load (with no read barrier).
+  void GenerateRawReferenceLoad(HInstruction* instruction,
+                                Location ref,
+                                vixl::aarch32::Register obj,
+                                uint32_t offset,
+                                Location index,
+                                ScaleFactor scale_factor,
+                                bool needs_null_check);
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
@@ -738,8 +753,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c9dde7c..5f02a52 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -391,7 +391,8 @@
 
 class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+  explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -401,7 +402,9 @@
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
+    if (!is_fatal_) {
+      SaveLiveRegisters(codegen, locations);
+    }
 
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
@@ -424,13 +427,19 @@
       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
-    RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ B(GetExitLabel());
+    }
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
 
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
  private:
+  const bool is_fatal_;
+
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS);
 };
 
@@ -482,8 +491,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       clobbered_ra_(false) {
@@ -1026,8 +1033,7 @@
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
-      boot_image_type_patches_.size() +
-      boot_image_address_patches_.size();
+      boot_image_type_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -1061,13 +1067,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1125,9 +1124,7 @@
 }
 
 Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
@@ -1899,9 +1896,9 @@
   }
 }
 
-auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) {
-  auto null_checker = [this, instruction]() {
-    this->codegen_->MaybeRecordImplicitNullCheck(instruction);
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* codegen) {
+  auto null_checker = [codegen, instruction]() {
+    codegen->MaybeRecordImplicitNullCheck(instruction);
   };
   return null_checker;
 }
@@ -1911,7 +1908,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-  auto null_checker = GetImplicitNullChecker(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   Primitive::Type type = instruction->GetType();
   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
@@ -2073,6 +2070,11 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
+
+  if (type == Primitive::kPrimNot) {
+    Register out = locations->Out().AsRegister<Register>();
+    __ MaybeUnpoisonHeapReference(out);
+  }
 }
 
 void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -2143,7 +2145,7 @@
   bool needs_runtime_call = locations->WillCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  auto null_checker = GetImplicitNullChecker(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
   Register base_reg = index.IsConstant() ? obj : TMP;
 
   switch (value_type) {
@@ -2200,7 +2202,31 @@
           DCHECK(!needs_write_barrier);
         } else {
           Register value = value_location.AsRegister<Register>();
-          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          if (kPoisonHeapReferences && needs_write_barrier) {
+            // Note that in the case where `value` is a null reference,
+            // we do not enter this block, as a null reference does not
+            // need poisoning.
+            DCHECK_EQ(value_type, Primitive::kPrimNot);
+            // Use Sw() instead of StoreToOffset() in order to be able to
+            // hold the poisoned reference in AT and thus avoid allocating
+            // yet another temporary register.
+            if (index.IsConstant()) {
+              if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
+                int16_t low = Low16Bits(data_offset);
+                uint32_t high = data_offset - low;
+                __ Addiu32(TMP, obj, high);
+                base_reg = TMP;
+                data_offset = low;
+              }
+            } else {
+              DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
+            }
+            __ PoisonHeapReference(AT, value);
+            __ Sw(AT, base_reg, data_offset);
+            null_checker();
+          } else {
+            __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          }
           if (needs_write_barrier) {
             DCHECK_EQ(value_type, Primitive::kPrimNot);
             codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
@@ -2208,6 +2234,8 @@
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
+        // Note: if heap poisoning is enabled, pAputObject takes care
+        // of poisoning the reference.
         codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
         CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
@@ -2300,29 +2328,178 @@
 }
 
 void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction,
-      LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = throws_into_catch
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathMIPS uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register obj_cls = locations->GetTemp(0).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+  MipsLabel done;
 
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
+  SlowPathCodeMIPS* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+                                                         is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(slow_path);
 
-  // TODO: avoid this check if we know obj is not null.
-  __ Beqz(obj, slow_path->GetExitLabel());
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
-  __ Bne(obj_cls, cls, slow_path->GetEntryLabel());
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Beqz(obj, &done);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ Bne(temp, cls, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      MipsLabel loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Beqz(temp, slow_path->GetEntryLabel());
+      // Otherwise, compare the classes.
+      __ Bne(temp, cls, &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Walk over the class hierarchy to find a match.
+      MipsLabel loop;
+      __ Bind(&loop);
+      __ Beq(temp, cls, &done);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception. Otherwise, jump to the beginning of the loop.
+      __ Bnez(temp, &loop);
+      __ B(slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Do an exact check.
+      __ Beq(temp, cls, &done);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Beqz(temp, slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array, further check that this component
+      // type is not a primitive type.
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Bnez(temp, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+      // We always go into the type check slow path for the unresolved check case.
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ B(slow_path->GetEntryLabel());
+      break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      __ LoadFromOffset(kLoadWord, temp, temp, iftable_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Iftable is never null.
+      __ Lw(TMP, temp, array_length_offset);
+      // Loop through the iftable and check if any class matches.
+      MipsLabel loop;
+      __ Bind(&loop);
+      __ Addiu(temp, temp, 2 * kHeapReferenceSize);  // Possibly in delay slot on R2.
+      __ Beqz(TMP, slow_path->GetEntryLabel());
+      __ Lw(AT, temp, object_array_data_offset - 2 * kHeapReferenceSize);
+      __ MaybeUnpoisonHeapReference(AT);
+      // Go to next interface.
+      __ Addiu(TMP, TMP, -2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Bne(AT, cls, &loop);
+      break;
+    }
+  }
+
+  __ Bind(&done);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -4891,7 +5068,7 @@
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-  auto null_checker = GetImplicitNullChecker(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4958,6 +5135,9 @@
         dst = locations->Out().AsRegister<Register>();
       }
       __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+      if (type == Primitive::kPrimNot) {
+        __ MaybeUnpoisonHeapReference(dst);
+      }
     } else {
       DCHECK(locations->Out().IsFpuRegister());
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
@@ -5016,7 +5196,8 @@
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-  auto null_checker = GetImplicitNullChecker(instruction);
+  bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -5089,7 +5270,16 @@
       } else {
         src = value_location.AsRegister<Register>();
       }
-      __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(type, Primitive::kPrimNot);
+        __ PoisonHeapReference(TMP, src);
+        __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+      } else {
+        __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      }
     } else {
       FRegister src = value_location.AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
@@ -5101,7 +5291,7 @@
   }
 
   // TODO: memory barriers?
-  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+  if (needs_write_barrier) {
     Register src = value_location.AsRegister<Register>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
@@ -5148,8 +5338,22 @@
 }
 
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
-  LocationSummary::CallKind call_kind =
-      instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = LocationSummary::kNoCall;
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
@@ -5159,35 +5363,143 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
-
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   MipsLabel done;
+  SlowPathCodeMIPS* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
-  // TODO: Avoid this check if we know `obj` is not null.
-  __ Move(out, ZERO);
-  __ Beqz(obj, &done);
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Move(out, ZERO);
+    __ Beqz(obj, &done);
+  }
 
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value());
-  if (instruction->IsExactCheck()) {
-    // Classes must be equal for the instanceof to succeed.
-    __ Xor(out, out, cls);
-    __ Sltiu(out, out, 1);
-  } else {
-    // If the classes are not equal, we go into a slow path.
-    DCHECK(locations->OnlyCallsOnSlowPath());
-    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
-    codegen_->AddSlowPath(slow_path);
-    __ Bne(out, cls, slow_path->GetEntryLabel());
-    __ LoadConst32(out, 1);
-    __ Bind(slow_path->GetExitLabel());
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // Classes must be equal for the instanceof to succeed.
+      __ Xor(out, out, cls);
+      __ Sltiu(out, out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      MipsLabel loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      __ LoadFromOffset(kLoadWord, out, out, super_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqz(out, &done);
+      __ Bne(out, cls, &loop);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // Walk over the class hierarchy to find a match.
+      MipsLabel loop, success;
+      __ Bind(&loop);
+      __ Beq(out, cls, &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      __ LoadFromOffset(kLoadWord, out, out, super_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      __ Bnez(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ B(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // Do an exact check.
+      MipsLabel success;
+      __ Beq(out, cls, &success);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      __ LoadFromOffset(kLoadWord, out, out, component_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqz(out, &done);
+      __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Sltiu(out, out, 1);
+      __ B(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+                                                                     /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bne(out, cls, slow_path->GetEntryLabel());
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+                                                                     /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   __ Bind(&done);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS::VisitIntConstant(HIntConstant* constant) {
@@ -5239,6 +5551,14 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   __ LoadFromOffset(kLoadWord, temp, temp,
       mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -5562,6 +5882,14 @@
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -5692,7 +6020,7 @@
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
       __ SetReorder(reordering);
       generate_null_check = true;
       break;
@@ -5837,7 +6165,7 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
       __ SetReorder(reordering);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
@@ -6059,6 +6387,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
 }
@@ -6076,6 +6406,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 47eba50..98fee24 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -297,7 +297,6 @@
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
-  auto GetImplicitNullChecker(HInstruction* instruction);
   void GenPackedSwitchWithCompares(Register value_reg,
                                    int32_t lower_bound,
                                    uint32_t num_entries,
@@ -536,8 +535,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<JitPatchInfo> jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5be0da4..5246dbc 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -336,7 +336,8 @@
 
 class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -347,7 +348,9 @@
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
+    if (!is_fatal_) {
+      SaveLiveRegisters(codegen, locations);
+    }
 
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
@@ -370,13 +373,19 @@
       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
-    RestoreLiveRegisters(codegen, locations);
-    __ Bc(GetExitLabel());
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ Bc(GetExitLabel());
+    }
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
 
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
  private:
+  const bool is_fatal_;
+
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
 };
 
@@ -430,8 +439,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -937,8 +944,7 @@
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
-      boot_image_type_patches_.size() +
-      boot_image_address_patches_.size();
+      boot_image_type_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -972,13 +978,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1042,9 +1041,7 @@
 }
 
 Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
@@ -1483,11 +1480,19 @@
   }
 }
 
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS64* codegen) {
+  auto null_checker = [codegen, instruction]() {
+    codegen->MaybeRecordImplicitNullCheck(instruction);
+  };
+  return null_checker;
+}
+
 void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location index = locations->InAt(1);
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   Primitive::Type type = instruction->GetType();
   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
@@ -1498,10 +1503,10 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker);
       } else {
         __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
-        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1511,10 +1516,10 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+        __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker);
       } else {
         __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
-        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1524,11 +1529,11 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1537,8 +1542,7 @@
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (maybe_compressed_char_at) {
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
-        __ LoadFromOffset(kLoadWord, TMP, obj, count_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
         __ Dext(TMP, TMP, 0, 1);
         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                       "Expecting 0=compressed, 1=uncompressed");
@@ -1563,7 +1567,8 @@
           __ LoadFromOffset(kLoadUnsignedHalfword,
                             out,
                             obj,
-                            data_offset + (const_index << TIMES_2));
+                            data_offset + (const_index << TIMES_2),
+                            null_checker);
         }
       } else {
         GpuRegister index_reg = index.AsRegister<GpuRegister>();
@@ -1581,7 +1586,7 @@
         } else {
           __ Dsll(TMP, index_reg, TIMES_2);
           __ Daddu(TMP, obj, TMP);
-          __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+          __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
         }
       }
       break;
@@ -1595,11 +1600,11 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFromOffset(load_type, out, obj, offset);
+        __ LoadFromOffset(load_type, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFromOffset(load_type, out, TMP, data_offset);
+        __ LoadFromOffset(load_type, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1609,11 +1614,11 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+        __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1623,11 +1628,11 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFpuFromOffset(kLoadWord, out, obj, offset);
+        __ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset);
+        __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1637,11 +1642,11 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset);
+        __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset);
+        __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1650,8 +1655,10 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
-  if (!maybe_compressed_char_at) {
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+  if (type == Primitive::kPrimNot) {
+    GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+    __ MaybeUnpoisonHeapReference(out);
   }
 }
 
@@ -1674,6 +1681,25 @@
   }
 }
 
+Location LocationsBuilderMIPS64::RegisterOrZeroConstant(HInstruction* instruction) {
+  return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern())
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
+Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* instruction) {
+  // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register.
+  // We can store a non-zero float or double constant without first loading it into the FPU,
+  // but we should only prefer this if the constant has a single use.
+  if (instruction->IsConstant() &&
+      (instruction->AsConstant()->IsZeroBitPattern() ||
+       instruction->GetUses().HasExactlyOneElement())) {
+    return Location::ConstantLocation(instruction->AsConstant());
+    // Otherwise fall through and require an FPU register for the constant.
+  }
+  return Location::RequiresFpuRegister();
+}
+
 void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
@@ -1688,9 +1714,9 @@
     locations->SetInAt(0, Location::RequiresRegister());
     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
     if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
+      locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
     } else {
-      locations->SetInAt(2, Location::RequiresRegister());
+      locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
     }
   }
 }
@@ -1699,23 +1725,29 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location index = locations->InAt(1);
+  Location value_location = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
   bool needs_runtime_call = locations->WillCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+  GpuRegister base_reg = index.IsConstant() ? obj : TMP;
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1;
       } else {
-        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
-        __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+        __ Daddu(base_reg, obj, index.AsRegister<GpuRegister>());
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker);
       }
       break;
     }
@@ -1723,15 +1755,18 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker);
       }
       break;
     }
@@ -1740,24 +1775,62 @@
     case Primitive::kPrimNot: {
       if (!needs_runtime_call) {
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
         if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, value, obj, offset);
+          data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
-          __ Daddu(TMP, obj, TMP);
-          __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+          __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+          __ Daddu(base_reg, obj, base_reg);
         }
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        if (needs_write_barrier) {
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+        if (value_location.IsConstant()) {
+          int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+          __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+          DCHECK(!needs_write_barrier);
+        } else {
+          GpuRegister value = value_location.AsRegister<GpuRegister>();
+          if (kPoisonHeapReferences && needs_write_barrier) {
+            // Note that in the case where `value` is a null reference,
+            // we do not enter this block, as a null reference does not
+            // need poisoning.
+            DCHECK_EQ(value_type, Primitive::kPrimNot);
+            // Use Sw() instead of StoreToOffset() in order to be able to
+            // hold the poisoned reference in AT and thus avoid allocating
+            // yet another temporary register.
+            if (index.IsConstant()) {
+              if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
+                int16_t low16 = Low16Bits(data_offset);
+                // For consistency with StoreToOffset() and such treat data_offset as int32_t.
+                uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16;
+                int16_t upper16 = High16Bits(high48);
+                // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a
+                // compensatory 64KB added, which may push `high48` above 2GB and require
+                // the dahi instruction.
+                int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0);
+                __ Daui(TMP, obj, upper16);
+                if (higher16 != 0) {
+                  __ Dahi(TMP, higher16);
+                }
+                base_reg = TMP;
+                data_offset = low16;
+              }
+            } else {
+              DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
+            }
+            __ PoisonHeapReference(AT, value);
+            __ Sw(AT, base_reg, data_offset);
+            null_checker();
+          } else {
+            __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          }
+          if (needs_write_barrier) {
+            DCHECK_EQ(value_type, Primitive::kPrimNot);
+            codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+          }
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
+        // Note: if heap poisoning is enabled, pAputObject takes care
+        // of poisoning the reference.
         codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
         CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
@@ -1766,47 +1839,54 @@
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
-      DCHECK(locations->InAt(2).IsFpuRegister());
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreFpuToOffset(kStoreWord, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        FpuRegister value = value_location.AsFpuRegister<FpuRegister>();
+        __ StoreFpuToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
-      DCHECK(locations->InAt(2).IsFpuRegister());
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        FpuRegister value = value_location.AsFpuRegister<FpuRegister>();
+        __ StoreFpuToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
       }
       break;
     }
@@ -1815,11 +1895,6 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
-
-  // Ints and objects are handled in the switch.
-  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
 }
 
 void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -1848,30 +1923,178 @@
 }
 
 void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction,
-      LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = throws_into_catch
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+  Mips64Label done;
 
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeMIPS64* slow_path =
-      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
+      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                           is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(slow_path);
 
-  // TODO: avoid this check if we know obj is not null.
-  __ Beqzc(obj, slow_path->GetExitLabel());
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
-  __ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Beqzc(obj, &done);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ Bnec(temp, cls, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      Mips64Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Beqzc(temp, slow_path->GetEntryLabel());
+      // Otherwise, compare the classes.
+      __ Bnec(temp, cls, &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Walk over the class hierarchy to find a match.
+      Mips64Label loop;
+      __ Bind(&loop);
+      __ Beqc(temp, cls, &done);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception. Otherwise, jump to the beginning of the loop.
+      __ Bnezc(temp, &loop);
+      __ Bc(slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Do an exact check.
+      __ Beqc(temp, cls, &done);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, component_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Beqzc(temp, slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array, further check that this component
+      // type is not a primitive type.
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Bnezc(temp, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+      // We always go into the type check slow path for the unresolved check case.
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ Bc(slow_path->GetEntryLabel());
+      break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      __ LoadFromOffset(kLoadUnsignedWord, temp, temp, iftable_offset);
+      __ MaybeUnpoisonHeapReference(temp);
+      // Iftable is never null.
+      __ Lw(TMP, temp, array_length_offset);
+      // Loop through the iftable and check if any class matches.
+      Mips64Label loop;
+      __ Bind(&loop);
+      __ Beqzc(TMP, slow_path->GetEntryLabel());
+      __ Lwu(AT, temp, object_array_data_offset);
+      __ MaybeUnpoisonHeapReference(AT);
+      // Go to next interface.
+      __ Daddiu(temp, temp, 2 * kHeapReferenceSize);
+      __ Addiu(TMP, TMP, -2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Bnec(AT, cls, &loop);
+      break;
+    }
+  }
+
+  __ Bind(&done);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -3086,6 +3309,9 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   LoadOperandType load_type = kLoadUnsignedByte;
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
   switch (type) {
     case Primitive::kPrimBoolean:
       load_type = kLoadUnsignedByte;
@@ -3117,15 +3343,18 @@
   if (!Primitive::IsFloatingPointType(type)) {
     DCHECK(locations->Out().IsRegister());
     GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-    __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
   } else {
     DCHECK(locations->Out().IsFpuRegister());
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-    __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker);
   }
-
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
   // TODO: memory barrier?
+
+  if (type == Primitive::kPrimNot) {
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    __ MaybeUnpoisonHeapReference(dst);
+  }
 }
 
 void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
@@ -3134,9 +3363,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1)));
   } else {
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1)));
   }
 }
 
@@ -3146,7 +3375,12 @@
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location value_location = locations->InAt(1);
   StoreOperandType store_type = kStoreByte;
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -3169,21 +3403,34 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  if (!Primitive::IsFloatingPointType(type)) {
-    DCHECK(locations->InAt(1).IsRegister());
-    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
-    __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
-  } else {
-    DCHECK(locations->InAt(1).IsFpuRegister());
-    FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
-    __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
-  }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  if (value_location.IsConstant()) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+    __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker);
+  } else {
+    if (!Primitive::IsFloatingPointType(type)) {
+      DCHECK(value_location.IsRegister());
+      GpuRegister src = value_location.AsRegister<GpuRegister>();
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(type, Primitive::kPrimNot);
+        __ PoisonHeapReference(TMP, src);
+        __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+      } else {
+        __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      }
+    } else {
+      DCHECK(value_location.IsFpuRegister());
+      FpuRegister src = value_location.AsFpuRegister<FpuRegister>();
+      __ StoreFpuToOffset(store_type, src, obj, offset, null_checker);
+    }
+  }
   // TODO: memory barriers?
-  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
-    DCHECK(locations->InAt(1).IsRegister());
-    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+  if (needs_write_barrier) {
+    DCHECK(value_location.IsRegister());
+    GpuRegister src = value_location.AsRegister<GpuRegister>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
 }
@@ -3222,8 +3469,22 @@
 }
 
 void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
-  LocationSummary::CallKind call_kind =
-      instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = LocationSummary::kNoCall;
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
@@ -3233,36 +3494,143 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   Mips64Label done;
+  SlowPathCodeMIPS64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
-  // TODO: Avoid this check if we know `obj` is not null.
-  __ Move(out, ZERO);
-  __ Beqzc(obj, &done);
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Move(out, ZERO);
+    __ Beqzc(obj, &done);
+  }
 
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
-  if (instruction->IsExactCheck()) {
-    // Classes must be equal for the instanceof to succeed.
-    __ Xor(out, out, cls);
-    __ Sltiu(out, out, 1);
-  } else {
-    // If the classes are not equal, we go into a slow path.
-    DCHECK(locations->OnlyCallsOnSlowPath());
-    SlowPathCodeMIPS64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
-    codegen_->AddSlowPath(slow_path);
-    __ Bnec(out, cls, slow_path->GetEntryLabel());
-    __ LoadConst32(out, 1);
-    __ Bind(slow_path->GetExitLabel());
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // Classes must be equal for the instanceof to succeed.
+      __ Xor(out, out, cls);
+      __ Sltiu(out, out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      Mips64Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqzc(out, &done);
+      __ Bnec(out, cls, &loop);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // Walk over the class hierarchy to find a match.
+      Mips64Label loop, success;
+      __ Bind(&loop);
+      __ Beqc(out, cls, &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      __ Bnezc(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Bc(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // Do an exact check.
+      Mips64Label success;
+      __ Beqc(out, cls, &success);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      __ LoadFromOffset(kLoadUnsignedWord, out, out, component_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqzc(out, &done);
+      __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Sltiu(out, out, 1);
+      __ Bc(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
+      __ MaybeUnpoisonHeapReference(out);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                                       /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bnec(out, cls, slow_path->GetEntryLabel());
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                                       /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bc(slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   __ Bind(&done);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) {
@@ -3325,6 +3693,14 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   __ LoadFromOffset(kLoadDoubleword, temp, temp,
       mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -3567,6 +3943,14 @@
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3666,8 +4050,8 @@
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
       generate_null_check = true;
       break;
     }
@@ -3773,8 +4157,8 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
       SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqzc(out, slow_path->GetEntryLabel());
@@ -3944,6 +4328,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
 }
@@ -3961,6 +4347,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
@@ -4722,12 +5110,34 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kMips64PointerSize).SizeValue();
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->InAt(0).AsRegister<GpuRegister>(),
+                      method_offset);
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kMips64PointerSize));
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->InAt(0).AsRegister<GpuRegister>(),
+                      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->Out().AsRegister<GpuRegister>(),
+                      method_offset);
+  }
 }
 
 }  // namespace mips64
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 26cc7dc..6040dc9 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -189,6 +189,8 @@
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  Location RegisterOrZeroConstant(HInstruction* instruction);
+  Location FpuRegisterOrConstantForStore(HInstruction* instruction);
 
   InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_;
 
@@ -492,8 +494,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
   // Patches for string root accesses in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b779aed..0b50619 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1015,7 +1015,6 @@
       assembler_(graph->GetArena()),
       isa_features_(isa_features),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -4603,13 +4602,6 @@
       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
 }
 
-void CodeGeneratorX86::RecordSimplePatch() {
-  if (GetCompilerOptions().GetIncludePatchInformation()) {
-    simple_patches_.emplace_back();
-    __ Bind(&simple_patches_.back());
-  }
-}
-
 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
   HX86ComputeBaseMethodAddress* address = nullptr;
@@ -4682,17 +4674,12 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      simple_patches_.size() +
       string_patches_.size() +
       boot_image_type_patches_.size() +
       type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  for (const Label& label : simple_patches_) {
-    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
@@ -6154,7 +6141,6 @@
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
       __ movl(out, Immediate(address));
-      codegen_->RecordSimplePatch();
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -6311,7 +6297,6 @@
           reinterpret_cast<uintptr_t>(load->GetString().Get()));
       DCHECK_NE(address, 0u);
       __ movl(out, Immediate(address));
-      codegen_->RecordSimplePatch();
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 5360dc9..65ee383 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -412,7 +412,6 @@
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
-  void RecordSimplePatch();
   void RecordBootStringPatch(HLoadString* load_string);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
@@ -633,8 +632,6 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Patch locations for patchoat where the linker doesn't do any other work.
-  ArenaDeque<Label> simple_patches_;
   // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
   ArenaDeque<X86PcRelativePatchInfo> string_patches_;
   // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC).
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 179bf6d..644fcee 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1070,13 +1070,6 @@
       kX86_64PointerSize).SizeValue()));
 }
 
-void CodeGeneratorX86_64::RecordSimplePatch() {
-  if (GetCompilerOptions().GetIncludePatchInformation()) {
-    simple_patches_.emplace_back();
-    __ Bind(&simple_patches_.back());
-  }
-}
-
 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
@@ -1126,17 +1119,12 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      simple_patches_.size() +
       string_patches_.size() +
       boot_image_type_patches_.size() +
       type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  for (const Label& label : simple_patches_) {
-    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
@@ -1227,7 +1215,6 @@
         isa_features_(isa_features),
         constant_area_start_(0),
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -5545,7 +5532,6 @@
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
       __ movl(out, Immediate(address));  // Zero-extended.
-      codegen_->RecordSimplePatch();
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -5681,7 +5667,6 @@
           reinterpret_cast<uintptr_t>(load->GetString().Get()));
       DCHECK_NE(address, 0u);
       __ movl(out, Immediate(address));  // Zero-extended.
-      codegen_->RecordSimplePatch();
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3a83731..376c3ce 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -406,7 +406,6 @@
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
-  void RecordSimplePatch();
   void RecordBootStringPatch(HLoadString* load_string);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
@@ -602,8 +601,6 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
-  // Patch locations for patchoat where the linker doesn't do any other work.
-  ArenaDeque<Label> simple_patches_;
   // String patch locations; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PatchInfo<Label>> string_patches_;
   // Type patch locations for boot image (always PIC).
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
new file mode 100644
index 0000000..dc3d378
--- /dev/null
+++ b/compiler/optimizing/code_sinking.cc
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_sinking.h"
+
+#include "common_dominator.h"
+#include "nodes.h"
+
+namespace art {
+
+void CodeSinking::Run() {
+  HBasicBlock* exit = graph_->GetExitBlock();
+  if (exit == nullptr) {
+    // Infinite loop, just bail.
+    return;
+  }
+  // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
+  // as an indicator of an uncommon branch.
+  for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
+    if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+      SinkCodeToUncommonBranch(exit_predecessor);
+    }
+  }
+}
+
+static bool IsInterestingInstruction(HInstruction* instruction) {
+  // Instructions from the entry graph (for example constants) are never interesting to move.
+  if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) {
+    return false;
+  }
+  // We want to move moveable instructions that cannot throw, as well as
+  // heap stores and allocations.
+
+  // Volatile stores cannot be moved.
+  if (instruction->IsInstanceFieldSet()) {
+    if (instruction->AsInstanceFieldSet()->IsVolatile()) {
+      return false;
+    }
+  }
+
+  // Check allocations first, as they can throw, but it is safe to move them.
+  if (instruction->IsNewInstance() || instruction->IsNewArray()) {
+    return true;
+  }
+
+  // All other instructions that can throw cannot be moved.
+  if (instruction->CanThrow()) {
+    return false;
+  }
+
+  // We can only store on local allocations. Other heap references can
+  // be escaping. Note that allocations can escape too, but we only move
+  // allocations if their users can move to, or are in the list of
+  // post dominated blocks.
+  if (instruction->IsInstanceFieldSet()) {
+    if (!instruction->InputAt(0)->IsNewInstance()) {
+      return false;
+    }
+  }
+
+  if (instruction->IsArraySet()) {
+    if (!instruction->InputAt(0)->IsNewArray()) {
+      return false;
+    }
+  }
+
+  // Heap accesses cannot go pass instructions that have memory side effects, which
+  // we are not tracking here. Note that the load/store elimination optimization
+  // runs before this optimization, and should have removed interesting ones.
+  // In theory, we could handle loads of local allocations, but this is currently
+  // hard to test, as LSE removes them.
+  if (instruction->IsStaticFieldGet() ||
+      instruction->IsInstanceFieldGet() ||
+      instruction->IsArrayGet()) {
+    return false;
+  }
+
+  if (instruction->IsInstanceFieldSet() ||
+      instruction->IsArraySet() ||
+      instruction->CanBeMoved()) {
+    return true;
+  }
+  return false;
+}
+
+static void AddInstruction(HInstruction* instruction,
+                           const ArenaBitVector& processed_instructions,
+                           const ArenaBitVector& discard_blocks,
+                           ArenaVector<HInstruction*>* worklist) {
+  // Add to the work list if the instruction is not in the list of blocks
+  // to discard, hasn't been already processed and is of interest.
+  if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) &&
+      !processed_instructions.IsBitSet(instruction->GetId()) &&
+      IsInterestingInstruction(instruction)) {
+    worklist->push_back(instruction);
+  }
+}
+
+static void AddInputs(HInstruction* instruction,
+                      const ArenaBitVector& processed_instructions,
+                      const ArenaBitVector& discard_blocks,
+                      ArenaVector<HInstruction*>* worklist) {
+  for (HInstruction* input : instruction->GetInputs()) {
+    AddInstruction(input, processed_instructions, discard_blocks, worklist);
+  }
+}
+
+static void AddInputs(HBasicBlock* block,
+                      const ArenaBitVector& processed_instructions,
+                      const ArenaBitVector& discard_blocks,
+                      ArenaVector<HInstruction*>* worklist) {
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+  }
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+  }
+}
+
+static bool ShouldFilterUse(HInstruction* instruction,
+                            HInstruction* user,
+                            const ArenaBitVector& post_dominated) {
+  if (instruction->IsNewInstance()) {
+    return user->IsInstanceFieldSet() &&
+        (user->InputAt(0) == instruction) &&
+        !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+  } else if (instruction->IsNewArray()) {
+    return user->IsArraySet() &&
+        (user->InputAt(0) == instruction) &&
+        !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+  }
+  return false;
+}
+
+
+// Find the ideal position for moving `instruction`. If `filter` is true,
+// we filter out store instructions to that instruction, which are processed
+// first in the step (3) of the sinking algorithm.
+// This method is tailored to the sinking algorithm, unlike
+// the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops.
+static HInstruction* FindIdealPosition(HInstruction* instruction,
+                                       const ArenaBitVector& post_dominated,
+                                       bool filter = false) {
+  DCHECK(!instruction->IsPhi());  // Makes no sense for Phi.
+
+  // Find the target block.
+  CommonDominator finder(/* start_block */ nullptr);
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
+      finder.Update(user->IsPhi()
+          ? user->GetBlock()->GetPredecessors()[use.GetIndex()]
+          : user->GetBlock());
+    }
+  }
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    DCHECK(!use.GetUser()->GetHolder()->IsPhi());
+    DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated));
+    finder.Update(use.GetUser()->GetHolder()->GetBlock());
+  }
+  HBasicBlock* target_block = finder.Get();
+  if (target_block == nullptr) {
+    // No user we can go next to? Likely a LSE or DCE limitation.
+    return nullptr;
+  }
+
+  // Move to the first dominator not in a loop, if we can.
+  while (target_block->IsInLoop()) {
+    if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
+      break;
+    }
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position. No need to filter anymore, as we have found a
+  // target block.
+  HInstruction* insert_pos = nullptr;
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    if (use.GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = use.GetUser();
+    }
+  }
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    HInstruction* user = use.GetUser()->GetHolder();
+    if (user->GetBlock() == target_block &&
+        (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
+      insert_pos = user;
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  DCHECK(!insert_pos->IsPhi());
+  return insert_pos;
+}
+
+
+void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
+  // Local allocator to discard data structures created below at the end of
+  // this optimization.
+  ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+
+  size_t number_of_instructions = graph_->GetCurrentInstructionId();
+  ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
+  ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
+  ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
+  ArenaBitVector instructions_that_can_move(
+      &allocator, number_of_instructions, /* expandable */ false);
+  ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
+
+  // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
+  // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
+  // computint the post dominator tree, but that could be too time consuming. Also,
+  // we should start the analysis from blocks dominated by an uncommon branch, but we
+  // don't profile branches yet.
+  bool found_block = false;
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (block == end_block) {
+      found_block = true;
+      post_dominated.SetBit(block->GetBlockId());
+    } else if (found_block) {
+      bool is_post_dominated = true;
+      if (block->GetSuccessors().empty()) {
+        // We currently bail for loops.
+        is_post_dominated = false;
+      } else {
+        for (HBasicBlock* successor : block->GetSuccessors()) {
+          if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+            is_post_dominated = false;
+            break;
+          }
+        }
+      }
+      if (is_post_dominated) {
+        post_dominated.SetBit(block->GetBlockId());
+      }
+    }
+  }
+
+  // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs
+  // of instructions in these blocks that are not themselves in these blocks.
+  // Also find the common dominator of the found post dominated blocks, to help filtering
+  // out un-movable uses in step (2).
+  CommonDominator finder(end_block);
+  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
+    if (post_dominated.IsBitSet(i)) {
+      finder.Update(graph_->GetBlocks()[i]);
+      AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist);
+    }
+  }
+  HBasicBlock* common_dominator = finder.Get();
+
+  // Step (2): iterate over the worklist to find sinking candidates.
+  while (!worklist.empty()) {
+    HInstruction* instruction = worklist.back();
+    if (processed_instructions.IsBitSet(instruction->GetId())) {
+      // The instruction has already been processed, continue. This happens
+      // when the instruction is the input/user of multiple instructions.
+      worklist.pop_back();
+      continue;
+    }
+    bool all_users_in_post_dominated_blocks = true;
+    bool can_move = true;
+    // Check users of the instruction.
+    for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) &&
+          !instructions_that_can_move.IsBitSet(user->GetId())) {
+        all_users_in_post_dominated_blocks = false;
+        // If we've already processed this user, or the user cannot be moved, or
+        // is not dominating the post dominated blocks, bail.
+        // TODO(ngeoffray): The domination check is an approximation. We should
+        // instead check if the dominated blocks post dominate the user's block,
+        // but we do not have post dominance information here.
+        if (processed_instructions.IsBitSet(user->GetId()) ||
+            !IsInterestingInstruction(user) ||
+            !user->GetBlock()->Dominates(common_dominator)) {
+          can_move = false;
+          break;
+        }
+      }
+    }
+
+    // Check environment users of the instruction. Some of these users require
+    // the instruction not to move.
+    if (all_users_in_post_dominated_blocks) {
+      for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+        HEnvironment* environment = use.GetUser();
+        HInstruction* user = environment->GetHolder();
+        if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+          if (graph_->IsDebuggable() ||
+              user->IsDeoptimize() ||
+              user->CanThrowIntoCatchBlock() ||
+              (user->IsSuspendCheck() && graph_->IsCompilingOsr())) {
+            can_move = false;
+            break;
+          }
+        }
+      }
+    }
+    if (!can_move) {
+      // Instruction cannot be moved, mark it as processed and remove it from the work
+      // list.
+      processed_instructions.SetBit(instruction->GetId());
+      worklist.pop_back();
+    } else if (all_users_in_post_dominated_blocks) {
+      // Instruction is a candidate for being sunk. Mark it as such, remove it from the
+      // work list, and add its inputs to the work list.
+      instructions_that_can_move.SetBit(instruction->GetId());
+      move_in_order.push_back(instruction);
+      processed_instructions.SetBit(instruction->GetId());
+      worklist.pop_back();
+      AddInputs(instruction, processed_instructions, post_dominated, &worklist);
+      // Drop the environment use not in the list of post-dominated block. This is
+      // to help step (3) of this optimization, when we start moving instructions
+      // closer to their use.
+      for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+        HEnvironment* environment = use.GetUser();
+        HInstruction* user = environment->GetHolder();
+        if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+          environment->RemoveAsUserOfInput(use.GetIndex());
+          environment->SetRawEnvAt(use.GetIndex(), nullptr);
+        }
+      }
+    } else {
+      // The information we have on the users was not enough to decide whether the
+      // instruction could be moved.
+      // Add the users to the work list, and keep the instruction in the work list
+      // to process it again once all users have been processed.
+      for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+        AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist);
+      }
+    }
+  }
+
+  // Make sure we process instructions in dominated order. This is required for heap
+  // stores.
+  std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
+    return b->StrictlyDominates(a);
+  });
+
+  // Step (3): Try to move sinking candidates.
+  for (HInstruction* instruction : move_in_order) {
+    HInstruction* position = nullptr;
+    if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+      if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
+        // A store can trivially move, but it can safely do so only if the heap
+        // location it stores to can also move.
+        // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions
+        // from the set and all their inputs.
+        continue;
+      }
+      // Find the position of the instruction we're storing into, filtering out this
+      // store and all other stores to that instruction.
+      position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
+
+      // The position needs to be dominated by the store, in order for the store to move there.
+      if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
+        continue;
+      }
+    } else {
+      // Find the ideal position within the post dominated blocks.
+      position = FindIdealPosition(instruction, post_dominated);
+      if (position == nullptr) {
+        continue;
+      }
+    }
+    // Bail if we could not find a position in the post dominated blocks (for example,
+    // if there are multiple users whose common dominator is not in the list of
+    // post dominated blocks).
+    if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
+      continue;
+    }
+    MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+    instruction->MoveBefore(position, /* ensure_safety */ false);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
new file mode 100644
index 0000000..59cda52
--- /dev/null
+++ b/compiler/optimizing/code_sinking.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimization pass to move instructions into uncommon branches,
+ * when it is safe to do so.
+ */
+class CodeSinking : public HOptimization {
+ public:
+  CodeSinking(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kCodeSinkingPassName, stats) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kCodeSinkingPassName = "code_sinking";
+
+ private:
+  // Try to move code only used by `end_block` and all its post-dominated / dominated
+  // blocks, to these blocks.
+  void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSinking);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index ecb8687..e184745 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
 
+#include "instruction_simplifier_shared.h"
 #include "debug/dwarf/register.h"
 #include "locations.h"
 #include "nodes.h"
@@ -29,6 +30,9 @@
 #pragma GCC diagnostic pop
 
 namespace art {
+
+using helpers::HasShifterOperand;
+
 namespace arm {
 namespace helpers {
 
@@ -218,6 +222,14 @@
   return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode());
 }
 
+inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+  DCHECK(HasShifterOperand(instruction, kArm));
+  // TODO: HAdd applied to the other integral types could make use of
+  // the SXTAB, SXTAH, UXTAB and UXTAH instructions.
+  return instruction->GetType() == Primitive::kPrimLong &&
+         (instruction->IsAdd() || instruction->IsSub());
+}
+
 }  // namespace helpers
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 93ea090..d3f431e 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 
 #include "code_generator.h"
+#include "instruction_simplifier_shared.h"
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -31,6 +32,10 @@
 #pragma GCC diagnostic pop
 
 namespace art {
+
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+
 namespace arm64 {
 namespace helpers {
 
@@ -290,11 +295,11 @@
   return true;
 }
 
-inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Shift ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
-    case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
-    case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
-    case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
+    case HDataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
+    case HDataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
+    case HDataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
     default:
       LOG(FATAL) << "Unexpected op kind " << op_kind;
       UNREACHABLE();
@@ -302,14 +307,14 @@
   }
 }
 
-inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Extend ExtendFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
-    case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
-    case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
-    case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
-    case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
-    case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
-    case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
+    case HDataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
+    case HDataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
+    case HDataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
+    case HDataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
+    case HDataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
+    case HDataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
     default:
       LOG(FATAL) << "Unexpected op kind " << op_kind;
       UNREACHABLE();
@@ -317,31 +322,8 @@
   }
 }
 
-inline bool CanFitInShifterOperand(HInstruction* instruction) {
-  if (instruction->IsTypeConversion()) {
-    HTypeConversion* conversion = instruction->AsTypeConversion();
-    Primitive::Type result_type = conversion->GetResultType();
-    Primitive::Type input_type = conversion->GetInputType();
-    // We don't expect to see the same type as input and result.
-    return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
-        (result_type != input_type);
-  } else {
-    return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
-        (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
-        (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
-  }
-}
-
-inline bool HasShifterOperand(HInstruction* instr) {
-  // `neg` instructions are an alias of `sub` using the zero register as the
-  // first register input.
-  bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
-      instr->IsOr() || instr->IsSub() || instr->IsXor();
-  return res;
-}
-
 inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
-  DCHECK(HasShifterOperand(instruction));
+  DCHECK(HasShifterOperand(instruction, kArm64));
   // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
   // does *not* support extension. This is because the `extended register` form
   // of the `sub` instruction interprets the left register with code 31 as the
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index b459d24..9f012cf 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -36,12 +36,16 @@
   // Create a finder starting with a given block.
   explicit CommonDominator(HBasicBlock* block)
       : dominator_(block), chain_length_(ChainLength(block)) {
-    DCHECK(block != nullptr);
   }
 
   // Update the common dominator with another block.
   void Update(HBasicBlock* block) {
     DCHECK(block != nullptr);
+    if (dominator_ == nullptr) {
+      dominator_ = block;
+      chain_length_ = ChainLength(block);
+      return;
+    }
     HBasicBlock* block2 = dominator_;
     DCHECK(block2 != nullptr);
     if (block == block2) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index f6fba88..2bf5c53 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -511,12 +511,10 @@
   void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
-#endif
 
-#ifdef ART_ENABLE_CODEGEN_arm64
-  void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+  void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
-    if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+    if (HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
       StartAttributeStream("shift") << instruction->GetShiftAmount();
     }
   }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5539413..d6513c8 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -57,21 +57,27 @@
   return false;
 }
 
-/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */
+/** Computes a * b for a,b > 0 (at least until first overflow happens). */
+static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
+  if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
+    *overflow = true;
+  }
+  return a * b;
+}
+
+/** Returns b^e for b,e > 0. Sets overflow if arithmetic wrap-around occurred. */
 static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) {
-  DCHECK_GE(b, 1);
-  DCHECK_GE(e, 1);
+  DCHECK_LT(0, b);
+  DCHECK_LT(0, e);
   int64_t pow = 1;
   while (e) {
     if (e & 1) {
-      int64_t oldpow = pow;
-      pow *= b;
-      if (pow < oldpow) {
-        *overflow = true;
-      }
+      pow = SafeMul(pow, b, overflow);
     }
     e >>= 1;
-    b *= b;
+    if (e) {
+      b = SafeMul(b, b, overflow);
+    }
   }
   return pow;
 }
@@ -377,6 +383,54 @@
   return false;
 }
 
+bool InductionVarRange::IsUnitStride(HInstruction* instruction,
+                                     /*out*/ HInstruction** offset) const {
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (HasInductionInfo(instruction, instruction, &loop, &info, &trip)) {
+    if (info->induction_class == HInductionVarAnalysis::kLinear &&
+        info->op_b->operation == HInductionVarAnalysis::kFetch &&
+        !HInductionVarAnalysis::IsNarrowingLinear(info)) {
+      int64_t stride_value = 0;
+      if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) {
+        int64_t off_value = 0;
+        if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) {
+          *offset = nullptr;
+        } else {
+          *offset = info->op_b->fetch;
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
+                                                   HGraph* graph,
+                                                   HBasicBlock* block) {
+  HInductionVarAnalysis::InductionInfo *trip =
+      induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+  if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+    HInstruction* taken_test = nullptr;
+    HInstruction* trip_expr = nullptr;
+    if (IsBodyTripCount(trip)) {
+      if (!GenerateCode(trip->op_b, nullptr, graph, block, &taken_test, false, false)) {
+        return nullptr;
+      }
+    }
+    if (GenerateCode(trip->op_a, nullptr, graph, block, &trip_expr, false, false)) {
+      if (taken_test != nullptr) {
+        HInstruction* zero = graph->GetConstant(trip->type, 0);
+        trip_expr = Insert(block, new (graph->GetArena()) HSelect(taken_test, trip_expr, zero, kNoDexPc));
+      }
+      return trip_expr;
+    }
+  }
+  return nullptr;
+}
+
 //
 // Private class methods.
 //
@@ -1157,12 +1211,15 @@
     HInstruction* opb = nullptr;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
-        // Invariants (note that even though is_min does not impact code generation for
-        // invariants, some effort is made to keep this parameter consistent).
+        // Invariants (note that since invariants only have other invariants as
+        // sub expressions, viz. no induction, there is no need to adjust is_min).
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
-          case HInductionVarAnalysis::kRem:  // no proper is_min for second arg
-          case HInductionVarAnalysis::kXor:  // no proper is_min for second arg
+          case HInductionVarAnalysis::kSub:
+          case HInductionVarAnalysis::kMul:
+          case HInductionVarAnalysis::kDiv:
+          case HInductionVarAnalysis::kRem:
+          case HInductionVarAnalysis::kXor:
           case HInductionVarAnalysis::kLT:
           case HInductionVarAnalysis::kLE:
           case HInductionVarAnalysis::kGT:
@@ -1174,6 +1231,12 @@
                 switch (info->operation) {
                   case HInductionVarAnalysis::kAdd:
                     operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+                  case HInductionVarAnalysis::kSub:
+                    operation = new (graph->GetArena()) HSub(type, opa, opb); break;
+                  case HInductionVarAnalysis::kMul:
+                    operation = new (graph->GetArena()) HMul(type, opa, opb, kNoDexPc); break;
+                  case HInductionVarAnalysis::kDiv:
+                    operation = new (graph->GetArena()) HDiv(type, opa, opb, kNoDexPc); break;
                   case HInductionVarAnalysis::kRem:
                     operation = new (graph->GetArena()) HRem(type, opa, opb, kNoDexPc); break;
                   case HInductionVarAnalysis::kXor:
@@ -1194,16 +1257,7 @@
               return true;
             }
             break;
-          case HInductionVarAnalysis::kSub:  // second reversed!
-            if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
-                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
-              if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HSub(type, opa, opb));
-              }
-              return true;
-            }
-            break;
-          case HInductionVarAnalysis::kNeg:  // reversed!
+          case HInductionVarAnalysis::kNeg:
             if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
               if (graph != nullptr) {
                 *result = Insert(block, new (graph->GetArena()) HNeg(type, opb));
@@ -1240,9 +1294,9 @@
               }
             }
             break;
-          default:
-            break;
-        }
+          case HInductionVarAnalysis::kNop:
+            LOG(FATAL) << "unexpected invariant nop";
+        }  // switch invariant operation
         break;
       case HInductionVarAnalysis::kLinear: {
         // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should
@@ -1293,7 +1347,7 @@
         }
         break;
       }
-    }
+    }  // switch induction class
   }
   return false;
 }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 6c424b7..0858d73 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -24,7 +24,8 @@
 /**
  * This class implements range analysis on expressions within loops. It takes the results
  * of induction variable analysis in the constructor and provides a public API to obtain
- * a conservative lower and upper bound value on each instruction in the HIR.
+ * a conservative lower and upper bound value or last value on each instruction in the HIR.
+ * The public API also provides a few general-purpose utility methods related to induction.
  *
  * The range analysis is done with a combination of symbolic and partial integral evaluation
  * of expressions. The analysis avoids complications with wrap-around arithmetic on the integral
@@ -154,6 +155,19 @@
    */
   bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
 
+  /**
+   * Checks if instruction is a unit stride induction inside the closest enveloping loop.
+   * Returns invariant offset on success.
+   */
+  bool IsUnitStride(HInstruction* instruction, /*out*/ HInstruction** offset) const;
+
+  /**
+   * Generates the trip count expression for the given loop. Code is generated in given block
+   * and graph. The expression is guarded by a taken test if needed. Returns the trip count
+   * expression on success or null otherwise.
+   */
+  HInstruction* GenerateTripCount(HLoopInformation* loop, HGraph* graph, HBasicBlock* block);
+
  private:
   /*
    * Enum used in IsConstant() request.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d81817f..fcdf8eb 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -48,6 +48,11 @@
     EXPECT_EQ(v1.is_known, v2.is_known);
   }
 
+  void ExpectInt(int32_t value, HInstruction* i) {
+    ASSERT_TRUE(i->IsIntConstant());
+    EXPECT_EQ(value, i->AsIntConstant()->GetValue());
+  }
+
   //
   // Construction methods.
   //
@@ -757,10 +762,20 @@
   // Last value (unsimplified).
   HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
   ASSERT_TRUE(last->IsAdd());
-  ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1000, last->InputAt(0));
+  ExpectInt(0, last->InputAt(1));
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(1000, tc);
+  HInstruction* offset = nullptr;
+  EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+  EXPECT_TRUE(offset == nullptr);
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  ExpectInt(1000, tce);
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -799,15 +814,27 @@
   // Last value (unsimplified).
   HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
   ASSERT_TRUE(last->IsSub());
-  ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, last->InputAt(0));
   ASSERT_TRUE(last->InputAt(1)->IsNeg());
   last = last->InputAt(1)->InputAt(0);
   ASSERT_TRUE(last->IsSub());
-  ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(0, last->InputAt(0));
+  ExpectInt(1000, last->InputAt(1));
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(1000, tc);
+  HInstruction* offset = nullptr;
+  EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  ASSERT_TRUE(tce->IsNeg());
+  last = tce->InputAt(0);
+  EXPECT_TRUE(last->IsSub());
+  ExpectInt(0, last->InputAt(0));
+  ExpectInt(1000, last->InputAt(1));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -851,27 +878,22 @@
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
   ASSERT_TRUE(lower->IsAdd());
-  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(0, lower->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(0, lower->InputAt(0));
+  ExpectInt(0, lower->InputAt(1));
 
   // Verify upper is (V-1)+0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsAdd());
   ASSERT_TRUE(upper->InputAt(0)->IsSub());
   EXPECT_TRUE(upper->InputAt(0)->InputAt(0)->IsParameterValue());
-  ASSERT_TRUE(upper->InputAt(0)->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1, upper->InputAt(0)->InputAt(1));
+  ExpectInt(0, upper->InputAt(1));
 
   // Verify taken-test is 0<V.
   HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsLessThan());
-  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(0, taken->InputAt(0));
   EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 
   // Replacement.
@@ -880,6 +902,21 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(y_, 1, 0), v2);
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(0, tc);  // unknown
+  HInstruction* offset = nullptr;
+  EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+  EXPECT_TRUE(offset == nullptr);
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  EXPECT_TRUE(tce->IsSelect());  // guarded by taken-test
+  ExpectInt(0, tce->InputAt(0));
+  EXPECT_TRUE(tce->InputAt(1)->IsParameterValue());
+  EXPECT_TRUE(tce->InputAt(2)->IsLessThan());
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
@@ -923,32 +960,26 @@
   // Verify lower is 1000-((1000-V)-1).
   ASSERT_TRUE(lower != nullptr);
   ASSERT_TRUE(lower->IsSub());
-  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, lower->InputAt(0));
   lower = lower->InputAt(1);
   ASSERT_TRUE(lower->IsSub());
-  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1, lower->InputAt(1));
   lower = lower->InputAt(0);
   ASSERT_TRUE(lower->IsSub());
-  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, lower->InputAt(0));
   EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
 
   // Verify upper is 1000-0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsSub());
-  ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1000, upper->InputAt(0));
+  ExpectInt(0, upper->InputAt(1));
 
   // Verify taken-test is 1000>V.
   HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsGreaterThan());
-  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, taken->InputAt(0));
   EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 
   // Replacement.
@@ -957,6 +988,23 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(y_, 1, 0), v1);
   ExpectEqual(Value(999), v2);
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(0, tc);  // unknown
+  HInstruction* offset = nullptr;
+  EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  EXPECT_TRUE(tce->IsSelect());  // guarded by taken-test
+  ExpectInt(0, tce->InputAt(0));
+  EXPECT_TRUE(tce->InputAt(1)->IsSub());
+  EXPECT_TRUE(tce->InputAt(2)->IsGreaterThan());
+  tce = tce->InputAt(1);
+  ExpectInt(1000, taken->InputAt(0));
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 8c73f1d..8a813bd 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -249,20 +249,25 @@
   ProfilingInfo* const profiling_info_;
 };
 
-static bool IsMonomorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK_GE(InlineCache::kIndividualCacheSize, 2);
-  return classes->Get(0) != nullptr && classes->Get(1) == nullptr;
-}
-
-static bool IsMegamorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    if (classes->Get(i) == nullptr) {
-      return false;
+HInliner::InlineCacheType HInliner::GetInlineCacheType(
+    const Handle<mirror::ObjectArray<mirror::Class>>& classes)
+  REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint8_t number_of_types = 0;
+  for (; number_of_types < InlineCache::kIndividualCacheSize; ++number_of_types) {
+    if (classes->Get(number_of_types) == nullptr) {
+      break;
     }
   }
-  return true;
+
+  if (number_of_types == 0) {
+    return kInlineCacheUninitialized;
+  } else if (number_of_types == 1) {
+    return kInlineCacheMonomorphic;
+  } else if (number_of_types == InlineCache::kIndividualCacheSize) {
+    return kInlineCacheMegamorphic;
+  } else {
+    return kInlineCachePolymorphic;
+  }
 }
 
 static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
@@ -271,18 +276,6 @@
   return classes->Get(0);
 }
 
-static bool IsUninitialized(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  return classes->Get(0) == nullptr;
-}
-
-static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK_GE(InlineCache::kIndividualCacheSize, 3);
-  return classes->Get(1) != nullptr &&
-      classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr;
-}
-
 ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
   if (!resolved_method->HasSingleImplementation()) {
     return nullptr;
@@ -353,67 +346,209 @@
     }
     return result;
   }
-
   DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
 
-  // Check if we can use an inline cache.
-  ArtMethod* caller = graph_->GetArtMethod();
-  if (Runtime::Current()->UseJitCompilation()) {
-    // Under JIT, we should always know the caller.
-    DCHECK(caller != nullptr);
-    ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
-    ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
-    if (profiling_info != nullptr) {
-      StackHandleScope<1> hs(soa.Self());
-      ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
-      Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs.NewHandle(
-          mirror::ObjectArray<mirror::Class>::Alloc(
-              soa.Self(),
-              class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
-              InlineCache::kIndividualCacheSize));
-      if (inline_cache == nullptr) {
-        // We got an OOME. Just clear the exception, and don't inline.
-        DCHECK(soa.Self()->IsExceptionPending());
-        soa.Self()->ClearException();
-        VLOG(compiler) << "Out of memory in the compiler when trying to inline";
-        return false;
+  // Try using inline caches.
+  return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method);
+}
+
+static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
+    const DexCompilationUnit& compilation_unit,
+    StackHandleScope<1>* hs)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  Thread* self = Thread::Current();
+  ClassLinker* class_linker = compilation_unit.GetClassLinker();
+  Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle(
+      mirror::ObjectArray<mirror::Class>::Alloc(
+          self,
+          class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
+          InlineCache::kIndividualCacheSize));
+  if (inline_cache == nullptr) {
+    // We got an OOME. Just clear the exception, and don't inline.
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    VLOG(compiler) << "Out of memory in the compiler when trying to inline";
+  }
+  return inline_cache;
+}
+
+bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
+                                        HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::ObjectArray<mirror::Class>> inline_cache;
+  InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler()
+      ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache)
+      : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
+
+  switch (inline_cache_type) {
+    case kInlineCacheNoData:
+      break;
+
+    case kInlineCacheUninitialized:
+      VLOG(compiler) << "Interface or virtual call to "
+                     << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+                     << " is not hit and not inlined";
+      return false;
+
+    case kInlineCacheMonomorphic:
+      MaybeRecordStat(kMonomorphicCall);
+      if (outermost_graph_->IsCompilingOsr()) {
+        // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
+        // interpreter and it may have seen different receiver types.
+        return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
-        Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
-            *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
-            inline_cache);
-        if (IsUninitialized(inline_cache)) {
-          VLOG(compiler) << "Interface or virtual call to "
-                         << caller_dex_file.PrettyMethod(method_index)
-                         << " is not hit and not inlined";
-          return false;
-        } else if (IsMonomorphic(inline_cache)) {
-          MaybeRecordStat(kMonomorphicCall);
-          if (outermost_graph_->IsCompilingOsr()) {
-            // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
-            // interpreter and it may have seen different receiver types.
-            return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
-          } else {
-            return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
-          }
-        } else if (IsPolymorphic(inline_cache)) {
-          MaybeRecordStat(kPolymorphicCall);
-          return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
-        } else {
-          DCHECK(IsMegamorphic(inline_cache));
-          VLOG(compiler) << "Interface or virtual call to "
-                         << caller_dex_file.PrettyMethod(method_index)
-                         << " is megamorphic and not inlined";
-          MaybeRecordStat(kMegamorphicCall);
-          return false;
-        }
+        return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
       }
+
+    case kInlineCachePolymorphic:
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+
+    case kInlineCacheMegamorphic:
+      VLOG(compiler) << "Interface or virtual call to "
+                     << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+                     << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
+      return false;
+
+    case kInlineCacheMissingTypes:
+      VLOG(compiler) << "Interface or virtual call to "
+                     << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+                     << " is missing types and not inlined";
+      return false;
+  }
+  UNREACHABLE();
+}
+
+HInliner::InlineCacheType HInliner::GetInlineCacheJIT(
+    HInvoke* invoke_instruction,
+    StackHandleScope<1>* hs,
+    /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(Runtime::Current()->UseJitCompilation());
+
+  ArtMethod* caller = graph_->GetArtMethod();
+  // Under JIT, we should always know the caller.
+  DCHECK(caller != nullptr);
+  ScopedProfilingInfoInlineUse spiis(caller, Thread::Current());
+  ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
+
+  if (profiling_info == nullptr) {
+    return kInlineCacheNoData;
+  }
+
+  *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
+  if (inline_cache->Get() == nullptr) {
+    // We can't extract any data if we failed to allocate;
+    return kInlineCacheNoData;
+  } else {
+    Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
+        *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
+        *inline_cache);
+    return GetInlineCacheType(*inline_cache);
+  }
+}
+
+HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
+    const DexFile& caller_dex_file,
+    HInvoke* invoke_instruction,
+    StackHandleScope<1>* hs,
+    /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo();
+  if (pci == nullptr) {
+    return kInlineCacheNoData;
+  }
+
+  ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile;
+  bool found = pci->GetMethod(caller_dex_file.GetLocation(),
+                              caller_dex_file.GetLocationChecksum(),
+                              caller_compilation_unit_.GetDexMethodIndex(),
+                              &offline_profile);
+  if (!found) {
+    return kInlineCacheNoData;  // no profile information for this invocation.
+  }
+
+  *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
+  if (inline_cache == nullptr) {
+    // We can't extract any data if we failed to allocate;
+    return kInlineCacheNoData;
+  } else {
+    return ExtractClassesFromOfflineProfile(invoke_instruction,
+                                            offline_profile,
+                                            *inline_cache);
+  }
+}
+
+HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
+    const HInvoke* invoke_instruction,
+    const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
+    /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  const auto it = offline_profile.inline_caches.find(invoke_instruction->GetDexPc());
+  if (it == offline_profile.inline_caches.end()) {
+    return kInlineCacheUninitialized;
+  }
+
+  const ProfileCompilationInfo::DexPcData& dex_pc_data = it->second;
+
+  if (dex_pc_data.is_missing_types) {
+    return kInlineCacheMissingTypes;
+  }
+  if (dex_pc_data.is_megamorphic) {
+    return kInlineCacheMegamorphic;
+  }
+
+  DCHECK_LE(dex_pc_data.classes.size(), InlineCache::kIndividualCacheSize);
+  Thread* self = Thread::Current();
+  // We need to resolve the class relative to the containing dex file.
+  // So first, build a mapping from the index of dex file in the profile to
+  // its dex cache. This will avoid repeating the lookup when walking over
+  // the inline cache types.
+  std::vector<ObjPtr<mirror::DexCache>> dex_profile_index_to_dex_cache(
+        offline_profile.dex_references.size());
+  for (size_t i = 0; i < offline_profile.dex_references.size(); i++) {
+    bool found = false;
+    for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) {
+      if (offline_profile.dex_references[i].MatchesDex(dex_file)) {
+        dex_profile_index_to_dex_cache[i] =
+            caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file);
+        found = true;
+      }
+    }
+    if (!found) {
+      VLOG(compiler) << "Could not find profiled dex file: "
+          << offline_profile.dex_references[i].dex_location;
+      return kInlineCacheMissingTypes;
     }
   }
 
-  VLOG(compiler) << "Interface or virtual call to "
-                 << caller_dex_file.PrettyMethod(method_index)
-                 << " could not be statically determined";
-  return false;
+  // Walk over the classes and resolve them. If we cannot find a type we return
+  // kInlineCacheMissingTypes.
+  int ic_index = 0;
+  for (const ProfileCompilationInfo::ClassReference& class_ref : dex_pc_data.classes) {
+    ObjPtr<mirror::DexCache> dex_cache =
+        dex_profile_index_to_dex_cache[class_ref.dex_profile_index];
+    DCHECK(dex_cache != nullptr);
+    ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType(
+          class_ref.type_index,
+          dex_cache,
+          caller_compilation_unit_.GetClassLoader().Get());
+    if (clazz != nullptr) {
+      inline_cache->Set(ic_index++, clazz);
+    } else {
+      VLOG(compiler) << "Could not resolve class from inline cache in AOT mode "
+          << caller_compilation_unit_.GetDexFile()->PrettyMethod(
+              invoke_instruction->GetDexMethodIndex()) << " : "
+          << caller_compilation_unit_
+              .GetDexFile()->StringByTypeIdx(class_ref.type_index);
+      return kInlineCacheMissingTypes;
+    }
+  }
+  return GetInlineCacheType(inline_cache);
 }
 
 HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
@@ -549,13 +684,20 @@
                                                                is_referrer,
                                                                invoke_instruction->GetDexPc(),
                                                                /* needs_access_check */ false);
-  HLoadClass::LoadKind kind = HSharpening::SharpenClass(
+  HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind(
       load_class, codegen_, compiler_driver_, caller_compilation_unit_);
   DCHECK(kind != HLoadClass::LoadKind::kInvalid)
       << "We should always be able to reference a class for inline caches";
   // Insert before setting the kind, as setting the kind affects the inputs.
   bb_cursor->InsertInstructionAfter(load_class, receiver_class);
   load_class->SetLoadKind(kind);
+  // In AOT mode, we will most likely load the class from BSS, which will involve a call
+  // to the runtime. In this case, the load instruction will need an environment so copy
+  // it from the invoke instruction.
+  if (load_class->NeedsEnvironment()) {
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
 
   HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
   bb_cursor->InsertInstructionAfter(compare, load_class);
@@ -746,11 +888,10 @@
     ArtMethod* resolved_method,
     Handle<mirror::ObjectArray<mirror::Class>> classes) {
   // This optimization only works under JIT for now.
-  DCHECK(Runtime::Current()->UseJitCompilation());
-  if (graph_->GetInstructionSet() == kMips64) {
-    // TODO: Support HClassTableGet for mips64.
+  if (!Runtime::Current()->UseJitCompilation()) {
     return false;
   }
+
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   PointerSize pointer_size = class_linker->GetImagePointerSize();
 
@@ -1064,9 +1205,8 @@
         // TODO: Needs null check.
         return false;
       }
-      Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
       HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
-      HInstanceFieldGet* iget = CreateInstanceFieldGet(dex_cache, data.field_idx, obj);
+      HInstanceFieldGet* iget = CreateInstanceFieldGet(data.field_idx, resolved_method, obj);
       DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
       DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
       invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
@@ -1079,10 +1219,9 @@
         // TODO: Needs null check.
         return false;
       }
-      Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
       HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
       HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg);
-      HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, data.field_idx, obj, value);
+      HInstanceFieldSet* iput = CreateInstanceFieldSet(data.field_idx, resolved_method, obj, value);
       DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset);
       DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile);
       invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
@@ -1116,24 +1255,19 @@
                                  [](uint16_t index) { return index != DexFile::kDexNoIndex16; }));
 
       // Create HInstanceFieldSet for each IPUT that stores non-zero data.
-      Handle<mirror::DexCache> dex_cache;
       HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u);
       bool needs_constructor_barrier = false;
       for (size_t i = 0; i != number_of_iputs; ++i) {
         HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
         if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
-          if (dex_cache.GetReference() == nullptr) {
-            dex_cache = handles_->NewHandle(resolved_method->GetDexCache());
-          }
           uint16_t field_index = iput_field_indexes[i];
-          HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, field_index, obj, value);
+          bool is_final;
+          HInstanceFieldSet* iput =
+              CreateInstanceFieldSet(field_index, resolved_method, obj, value, &is_final);
           invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
 
           // Check whether the field is final. If it is, we need to add a barrier.
-          PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
-          ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
-          DCHECK(resolved_field != nullptr);
-          if (resolved_field->IsFinal()) {
+          if (is_final) {
             needs_constructor_barrier = true;
           }
         }
@@ -1152,12 +1286,13 @@
   return true;
 }
 
-HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
-                                                    uint32_t field_index,
+HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
+                                                    ArtMethod* referrer,
                                                     HInstruction* obj)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
-  ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* resolved_field =
+      class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
   DCHECK(resolved_field != nullptr);
   HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
       obj,
@@ -1167,12 +1302,13 @@
       resolved_field->IsVolatile(),
       field_index,
       resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
-      *dex_cache->GetDexFile(),
+      *referrer->GetDexFile(),
       // Read barrier generates a runtime call in slow path and we need a valid
       // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
       /* dex_pc */ 0);
   if (iget->GetType() == Primitive::kPrimNot) {
     // Use the same dex_cache that we used for field lookup as the hint_dex_cache.
+    Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache());
     ReferenceTypePropagation rtp(graph_,
                                  outer_compilation_unit_.GetClassLoader(),
                                  dex_cache,
@@ -1183,14 +1319,21 @@
   return iget;
 }
 
-HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
-                                                    uint32_t field_index,
+HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index,
+                                                    ArtMethod* referrer,
                                                     HInstruction* obj,
-                                                    HInstruction* value)
+                                                    HInstruction* value,
+                                                    bool* is_final)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
-  ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* resolved_field =
+      class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
   DCHECK(resolved_field != nullptr);
+  if (is_final != nullptr) {
+    // This information is needed only for constructors.
+    DCHECK(referrer->IsConstructor());
+    *is_final = resolved_field->IsFinal();
+  }
   HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
       obj,
       value,
@@ -1200,7 +1343,7 @@
       resolved_field->IsVolatile(),
       field_index,
       resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
-      *dex_cache->GetDexFile(),
+      *referrer->GetDexFile(),
       // Read barrier generates a runtime call in slow path and we need a valid
       // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
       /* dex_pc */ 0);
@@ -1272,12 +1415,19 @@
       caller_instruction_counter);
   callee_graph->SetArtMethod(resolved_method);
 
-  // When they are needed, allocate `inline_stats` on the heap instead
+  // When they are needed, allocate `inline_stats_` on the Arena instead
   // of on the stack, as Clang might produce a stack frame too large
   // for this function, that would not fit the requirements of the
   // `-Wframe-larger-than` option.
-  std::unique_ptr<OptimizingCompilerStats> inline_stats =
-      (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
+  if (stats_ != nullptr) {
+    // Reuse one object for all inline attempts from this caller to keep Arena memory usage low.
+    if (inline_stats_ == nullptr) {
+      void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc);
+      inline_stats_ = new (storage) OptimizingCompilerStats;
+    } else {
+      inline_stats_->Reset();
+    }
+  }
   HGraphBuilder builder(callee_graph,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
@@ -1285,7 +1435,7 @@
                         *code_item,
                         compiler_driver_,
                         codegen_,
-                        inline_stats.get(),
+                        inline_stats_,
                         resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
                         dex_cache,
                         handles_);
@@ -1373,6 +1523,13 @@
                        << " could not be inlined because one branch always throws and"
                        << " caller does not have an exit block";
         return false;
+      } else if (graph_->HasIrreducibleLoops()) {
+        // TODO(ngeoffray): Support re-computing loop information to graphs with
+        // irreducible loops?
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
+                       << " could not be inlined because one branch always throws and"
+                       << " caller has irreducible loops";
+        return false;
       }
     } else {
       has_one_return = true;
@@ -1468,6 +1625,11 @@
   DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
       << "No instructions can be added to the inner graph during inlining into the outer graph";
 
+  if (stats_ != nullptr) {
+    DCHECK(inline_stats_ != nullptr);
+    inline_stats_->AddTo(stats_);
+  }
+
   return true;
 }
 
@@ -1476,11 +1638,11 @@
                                   const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
-  HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+  HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
-  InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, stats_);
+  InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
@@ -1504,7 +1666,7 @@
                      dex_compilation_unit,
                      compiler_driver_,
                      handles_,
-                     stats_,
+                     inline_stats_,
                      total_number_of_dex_registers_ + code_item->registers_size_,
                      depth_ + 1);
     inliner.Run();
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 11aacab..a032042 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -20,6 +20,7 @@
 #include "dex_file_types.h"
 #include "invoke_type.h"
 #include "optimization.h"
+#include "jit/profile_compilation_info.h"
 
 namespace art {
 
@@ -51,13 +52,23 @@
         total_number_of_dex_registers_(total_number_of_dex_registers),
         depth_(depth),
         number_of_inlined_instructions_(0),
-        handles_(handles) {}
+        handles_(handles),
+        inline_stats_(nullptr) {}
 
   void Run() OVERRIDE;
 
   static constexpr const char* kInlinerPassName = "inliner";
 
  private:
+  enum InlineCacheType {
+    kInlineCacheNoData = 0,
+    kInlineCacheUninitialized = 1,
+    kInlineCacheMonomorphic = 2,
+    kInlineCachePolymorphic = 3,
+    kInlineCacheMegamorphic = 4,
+    kInlineCacheMissingTypes = 5
+  };
+
   bool TryInline(HInvoke* invoke_instruction);
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
@@ -96,14 +107,54 @@
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Create a new HInstanceFieldGet.
-  HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
-                                            uint32_t field_index,
+  HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
+                                            ArtMethod* referrer,
                                             HInstruction* obj);
   // Create a new HInstanceFieldSet.
-  HInstanceFieldSet* CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
-                                            uint32_t field_index,
+  HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
+                                            ArtMethod* referrer,
                                             HInstruction* obj,
-                                            HInstruction* value);
+                                            HInstruction* value,
+                                            bool* is_final = nullptr);
+
+  // Try inlining the invoke instruction using inline caches.
+  bool TryInlineFromInlineCache(
+      const DexFile& caller_dex_file,
+      HInvoke* invoke_instruction,
+      ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Try getting the inline cache from JIT code cache.
+  // Return true if the inline cache was successfully allocated and the
+  // invoke info was found in the profile info.
+  InlineCacheType GetInlineCacheJIT(
+      HInvoke* invoke_instruction,
+      StackHandleScope<1>* hs,
+      /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Try getting the inline cache from AOT offline profile.
+  // Return true if the inline cache was successfully allocated and the
+  // invoke info was found in the profile info.
+  InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
+      HInvoke* invoke_instruction,
+      StackHandleScope<1>* hs,
+      /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
+  // Note that even if we have profile data for the invoke the inline_cache might contain
+  // only null entries if the types cannot be resolved.
+  InlineCacheType ExtractClassesFromOfflineProfile(
+      const HInvoke* invoke_instruction,
+      const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
+      /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Compute the inline cache type.
+  InlineCacheType GetInlineCacheType(
+      const Handle<mirror::ObjectArray<mirror::Class>>& classes)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Try to inline the target of a monomorphic call. If successful, the code
   // in the graph will look like:
@@ -218,6 +269,10 @@
   size_t number_of_inlined_instructions_;
   VariableSizedHandleScope* const handles_;
 
+  // Used to record stats about optimizations on the inlined graph.
+  // If the inlining is successful, these stats are merged to the caller graph's stats.
+  OptimizingCompilerStats* inline_stats_;
+
   DISALLOW_COPY_AND_ASSIGN(HInliner);
 };
 
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c60f6e5..88f67fa 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -37,37 +37,45 @@
   return block_builder_->GetBlockAt(dex_pc);
 }
 
-ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+inline ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
   ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
   const size_t vregs = graph_->GetNumberOfVRegs();
-  if (locals->size() != vregs) {
-    locals->resize(vregs, nullptr);
+  if (locals->size() == vregs) {
+    return locals;
+  }
+  return GetLocalsForWithAllocation(block, locals, vregs);
+}
 
-    if (block->IsCatchBlock()) {
-      // We record incoming inputs of catch phis at throwing instructions and
-      // must therefore eagerly create the phis. Phis for undefined vregs will
-      // be deleted when the first throwing instruction with the vreg undefined
-      // is encountered. Unused phis will be removed by dead phi analysis.
-      for (size_t i = 0; i < vregs; ++i) {
-        // No point in creating the catch phi if it is already undefined at
-        // the first throwing instruction.
-        HInstruction* current_local_value = (*current_locals_)[i];
-        if (current_local_value != nullptr) {
-          HPhi* phi = new (arena_) HPhi(
-              arena_,
-              i,
-              0,
-              current_local_value->GetType());
-          block->AddPhi(phi);
-          (*locals)[i] = phi;
-        }
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation(
+    HBasicBlock* block,
+    ArenaVector<HInstruction*>* locals,
+    const size_t vregs) {
+  DCHECK_NE(locals->size(), vregs);
+  locals->resize(vregs, nullptr);
+  if (block->IsCatchBlock()) {
+    // We record incoming inputs of catch phis at throwing instructions and
+    // must therefore eagerly create the phis. Phis for undefined vregs will
+    // be deleted when the first throwing instruction with the vreg undefined
+    // is encountered. Unused phis will be removed by dead phi analysis.
+    for (size_t i = 0; i < vregs; ++i) {
+      // No point in creating the catch phi if it is already undefined at
+      // the first throwing instruction.
+      HInstruction* current_local_value = (*current_locals_)[i];
+      if (current_local_value != nullptr) {
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            i,
+            0,
+            current_local_value->GetType());
+        block->AddPhi(phi);
+        (*locals)[i] = phi;
       }
     }
   }
   return locals;
 }
 
-HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+inline HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
   ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
   return (*locals)[local];
 }
@@ -1676,10 +1684,10 @@
       dex_pc,
       needs_access_check);
 
-  HLoadClass::LoadKind load_kind = HSharpening::SharpenClass(load_class,
-                                                             code_generator_,
-                                                             compiler_driver_,
-                                                             *dex_compilation_unit_);
+  HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class,
+                                                                     code_generator_,
+                                                                     compiler_driver_,
+                                                                     *dex_compilation_unit_);
 
   if (load_kind == HLoadClass::LoadKind::kInvalid) {
     // We actually cannot reference this class, we're forced to bail.
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index e735a0c..7fdc188 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -93,6 +93,10 @@
   HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
 
   ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+  // Out of line version of GetLocalsFor(), which has a fast path that is
+  // beneficial to get inlined by callers.
+  ArenaVector<HInstruction*>* GetLocalsForWithAllocation(
+      HBasicBlock* block, ArenaVector<HInstruction*>* locals, const size_t vregs);
   HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
   HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
   HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 35f59cb..17421fc 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -19,14 +19,18 @@
 #include "escape.h"
 #include "intrinsics.h"
 #include "mirror/class-inl.h"
+#include "sharpening.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
 
 class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
  public:
-  InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+  InstructionSimplifierVisitor(HGraph* graph,
+                               CodeGenerator* codegen,
+                               OptimizingCompilerStats* stats)
       : HGraphDelegateVisitor(graph),
+        codegen_(codegen),
         stats_(stats) {}
 
   void Run();
@@ -112,6 +116,7 @@
   void SimplifyAllocationIntrinsic(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
+  CodeGenerator* codegen_;
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -123,7 +128,7 @@
 };
 
 void InstructionSimplifier::Run() {
-  InstructionSimplifierVisitor visitor(graph_, stats_);
+  InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
   visitor.Run();
 }
 
@@ -1805,6 +1810,8 @@
 
   {
     ScopedObjectAccess soa(Thread::Current());
+    Primitive::Type source_component_type = Primitive::kPrimVoid;
+    Primitive::Type destination_component_type = Primitive::kPrimVoid;
     ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
     if (destination_rti.IsValid()) {
       if (destination_rti.IsObjectArray()) {
@@ -1814,6 +1821,8 @@
         optimizations.SetDestinationIsTypedObjectArray();
       }
       if (destination_rti.IsPrimitiveArrayClass()) {
+        destination_component_type =
+            destination_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
         optimizations.SetDestinationIsPrimitiveArray();
       } else if (destination_rti.IsNonPrimitiveArrayClass()) {
         optimizations.SetDestinationIsNonPrimitiveArray();
@@ -1826,10 +1835,55 @@
       }
       if (source_rti.IsPrimitiveArrayClass()) {
         optimizations.SetSourceIsPrimitiveArray();
+        source_component_type = source_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
       } else if (source_rti.IsNonPrimitiveArrayClass()) {
         optimizations.SetSourceIsNonPrimitiveArray();
       }
     }
+    // For primitive arrays, use their optimized ArtMethod implementations.
+    if ((source_component_type != Primitive::kPrimVoid) &&
+        (source_component_type == destination_component_type)) {
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      PointerSize image_size = class_linker->GetImagePointerSize();
+      HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+      mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass();
+      ArtMethod* method = nullptr;
+      switch (source_component_type) {
+        case Primitive::kPrimBoolean:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([ZI[ZII)V", image_size);
+          break;
+        case Primitive::kPrimByte:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([BI[BII)V", image_size);
+          break;
+        case Primitive::kPrimChar:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([CI[CII)V", image_size);
+          break;
+        case Primitive::kPrimShort:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([SI[SII)V", image_size);
+          break;
+        case Primitive::kPrimInt:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([II[III)V", image_size);
+          break;
+        case Primitive::kPrimFloat:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([FI[FII)V", image_size);
+          break;
+        case Primitive::kPrimLong:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([JI[JII)V", image_size);
+          break;
+        case Primitive::kPrimDouble:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([DI[DII)V", image_size);
+          break;
+        default:
+          LOG(FATAL) << "Unreachable";
+      }
+      DCHECK(method != nullptr);
+      invoke->SetResolvedMethod(method);
+      // Sharpen the new invoke. Note that we do not update the dex method index of
+      // the invoke, as we would need to look it up in the current dex file, and it
+      // is unlikely that it exists. The most usual situation for such typed
+      // arraycopy methods is a direct pointer to the boot image.
+      HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+    }
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 7fe1067..f7329a4 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -23,6 +23,8 @@
 
 namespace art {
 
+class CodeGenerator;
+
 /**
  * Implements optimizations specific to each instruction.
  *
@@ -36,15 +38,19 @@
 class InstructionSimplifier : public HOptimization {
  public:
   explicit InstructionSimplifier(HGraph* graph,
+                                 CodeGenerator* codegen,
                                  OptimizingCompilerStats* stats = nullptr,
                                  const char* name = kInstructionSimplifierPassName)
-      : HOptimization(graph, name, stats) {}
+      : HOptimization(graph, name, stats),
+        codegen_(codegen) {}
 
   static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
 
   void Run() OVERRIDE;
 
  private:
+  CodeGenerator* codegen_;
+
   DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
 };
 
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 56e4c7a..5f5e29b 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -15,23 +15,124 @@
  */
 
 #include "code_generator.h"
+#include "common_arm.h"
 #include "instruction_simplifier_arm.h"
 #include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
+#include "nodes.h"
 
 namespace art {
+
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+
 namespace arm {
 
-void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
-  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+using helpers::ShifterOperandSupportsExtension;
+
+bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use,
+                                                                 HInstruction* bitfield_op,
+                                                                 bool do_merge) {
+  DCHECK(HasShifterOperand(use, kArm));
+  DCHECK(use->IsBinaryOperation());
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+  DCHECK(!bitfield_op->HasEnvironmentUses());
+
+  Primitive::Type type = use->GetType();
+  if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+    return false;
+  }
+
+  HInstruction* left = use->InputAt(0);
+  HInstruction* right = use->InputAt(1);
+  DCHECK(left == bitfield_op || right == bitfield_op);
+
+  if (left == right) {
+    // TODO: Handle special transformations in this situation?
+    // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+    // Or should this be part of a separate transformation logic?
+    return false;
+  }
+
+  bool is_commutative = use->AsBinaryOperation()->IsCommutative();
+  HInstruction* other_input;
+  if (bitfield_op == right) {
+    other_input = left;
+  } else {
+    if (is_commutative) {
+      other_input = right;
+    } else {
+      return false;
+    }
+  }
+
+  HDataProcWithShifterOp::OpKind op_kind;
+  int shift_amount = 0;
+
+  HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+  shift_amount &= use->GetType() == Primitive::kPrimInt
+      ? kMaxIntShiftDistance
+      : kMaxLongShiftDistance;
+
+  if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+    if (!ShifterOperandSupportsExtension(use)) {
+      return false;
+    }
+  // Shift by 1 is a special case that results in the same number and type of instructions
+  // as this simplification, but potentially shorter code.
+  } else if (type == Primitive::kPrimLong && shift_amount == 1) {
+    return false;
+  }
+
+  if (do_merge) {
+    HDataProcWithShifterOp* alu_with_op =
+        new (GetGraph()->GetArena()) HDataProcWithShifterOp(use,
+                                                            other_input,
+                                                            bitfield_op->InputAt(0),
+                                                            op_kind,
+                                                            shift_amount,
+                                                            use->GetDexPc());
+    use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+    if (bitfield_op->GetUses().empty()) {
+      bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+    }
     RecordSimplification();
   }
+
+  return true;
 }
 
-void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
-  if (TryMergeNegatedInput(instruction)) {
-    RecordSimplification();
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArmVisitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+
+  if (bitfield_op->HasEnvironmentUses()) {
+    return false;
   }
+
+  const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+  // Check whether we can merge the instruction in all its users' shifter operand.
+  for (const HUseListNode<HInstruction*>& use : uses) {
+    HInstruction* user = use.GetUser();
+    if (!HasShifterOperand(user, kArm)) {
+      return false;
+    }
+    if (!CanMergeIntoShifterOperand(user, bitfield_op)) {
+      return false;
+    }
+  }
+
+  // Merge the instruction into its uses.
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    // Increment `it` now because `*it` will disappear thanks to MergeIntoShifterOperand().
+    ++it;
+    bool merged = MergeIntoShifterOperand(user, bitfield_op);
+    DCHECK(merged);
+  }
+
+  return true;
 }
 
 void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) {
@@ -89,5 +190,49 @@
   }
 }
 
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitShl(HShl* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitShr(HShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitTypeConversion(HTypeConversion* instruction) {
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+
+  if (input_type == result_type) {
+    // We let the arch-independent code handle this.
+    return;
+  }
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 9b54511..e2ed257 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -35,11 +35,41 @@
     }
   }
 
-  void VisitMul(HMul* instruction) OVERRIDE;
-  void VisitOr(HOr* instruction) OVERRIDE;
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
+  }
+
+  /**
+   * This simplifier uses a special-purpose BB visitor.
+   * (1) No need to visit Phi nodes.
+   * (2) Since statements can be removed in a "forward" fashion,
+   *     the visitor should test if each statement is still there.
+   */
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    // TODO: fragile iteration, provide more robust iterators?
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6d107d5..73b7b2b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -22,16 +22,18 @@
 #include "mirror/string.h"
 
 namespace art {
-namespace arm64 {
 
 using helpers::CanFitInShifterOperand;
 using helpers::HasShifterOperand;
+
+namespace arm64 {
+
 using helpers::ShifterOperandSupportsExtension;
 
 bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
                                                                    HInstruction* bitfield_op,
                                                                    bool do_merge) {
-  DCHECK(HasShifterOperand(use));
+  DCHECK(HasShifterOperand(use, kArm64));
   DCHECK(use->IsBinaryOperation() || use->IsNeg());
   DCHECK(CanFitInShifterOperand(bitfield_op));
   DCHECK(!bitfield_op->HasEnvironmentUses());
@@ -72,23 +74,22 @@
     }
   }
 
-  HArm64DataProcWithShifterOp::OpKind op_kind;
+  HDataProcWithShifterOp::OpKind op_kind;
   int shift_amount = 0;
-  HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+  HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
 
-  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
-      !ShifterOperandSupportsExtension(use)) {
+  if (HDataProcWithShifterOp::IsExtensionOp(op_kind) && !ShifterOperandSupportsExtension(use)) {
     return false;
   }
 
   if (do_merge) {
-    HArm64DataProcWithShifterOp* alu_with_op =
-        new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
-                                                                 other_input,
-                                                                 bitfield_op->InputAt(0),
-                                                                 op_kind,
-                                                                 shift_amount,
-                                                                 use->GetDexPc());
+    HDataProcWithShifterOp* alu_with_op =
+        new (GetGraph()->GetArena()) HDataProcWithShifterOp(use,
+                                                            other_input,
+                                                            bitfield_op->InputAt(0),
+                                                            op_kind,
+                                                            shift_amount,
+                                                            use->GetDexPc());
     use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
     if (bitfield_op->GetUses().empty()) {
       bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
@@ -112,7 +113,7 @@
   // Check whether we can merge the instruction in all its users' shifter operand.
   for (const HUseListNode<HInstruction*>& use : uses) {
     HInstruction* user = use.GetUser();
-    if (!HasShifterOperand(user)) {
+    if (!HasShifterOperand(user, kArm64)) {
       return false;
     }
     if (!CanMergeIntoShifterOperand(user, bitfield_op)) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index d4cb1f1..65654f5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -40,11 +40,11 @@
                                   HInstruction* bitfield_op,
                                   bool do_merge);
   bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
-    return TryMergeIntoShifterOperand(use, bitfield_op, false);
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
   }
   bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
     DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
-    return TryMergeIntoShifterOperand(use, bitfield_op, true);
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
   }
 
   /**
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 56804f5..83e3ffc 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -21,6 +21,33 @@
 
 namespace art {
 
+namespace helpers {
+
+inline bool CanFitInShifterOperand(HInstruction* instruction) {
+  if (instruction->IsTypeConversion()) {
+    HTypeConversion* conversion = instruction->AsTypeConversion();
+    Primitive::Type result_type = conversion->GetResultType();
+    Primitive::Type input_type = conversion->GetInputType();
+    // We don't expect to see the same type as input and result.
+    return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+        (result_type != input_type);
+  } else {
+    return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+  }
+}
+
+inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) {
+  // On ARM64 `neg` instructions are an alias of `sub` using the zero register
+  // as the first register input.
+  bool res = instr->IsAdd() || instr->IsAnd() || (isa == kArm64 && instr->IsNeg()) ||
+      instr->IsOr() || instr->IsSub() || instr->IsXor();
+  return res;
+}
+
+}  // namespace helpers
+
 bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
 // For bitwise operations (And/Or/Xor) with a negated input, try to use
 // a negated bitwise instruction.
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 17d683f..8df80ad 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -19,6 +19,7 @@
 #include "art_method.h"
 #include "class_linker.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "invoke_type.h"
 #include "mirror/dex_cache-inl.h"
 #include "nodes.h"
@@ -178,4 +179,112 @@
   return os;
 }
 
+void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
+                                                      CodeGenerator* codegen,
+                                                      Location return_location,
+                                                      Location first_argument_location) {
+  if (Runtime::Current()->IsAotCompiler()) {
+    if (codegen->GetCompilerOptions().IsBootImage() ||
+        codegen->GetCompilerOptions().GetCompilePic()) {
+      // TODO(ngeoffray): Support boot image compilation.
+      return;
+    }
+  }
+
+  IntegerValueOfInfo info = ComputeIntegerValueOfInfo();
+
+  // Most common case is that we have found all we needed (classes are initialized
+  // and in the boot image). Bail if not.
+  if (info.integer_cache == nullptr ||
+      info.integer == nullptr ||
+      info.cache == nullptr ||
+      info.value_offset == 0 ||
+      // low and high cannot be 0, per the spec.
+      info.low == 0 ||
+      info.high == 0) {
+    LOG(INFO) << "Integer.valueOf will not be optimized";
+    return;
+  }
+
+  // The intrinsic will call if it needs to allocate a j.l.Integer.
+  LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary(
+      invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+  if (!invoke->InputAt(0)->IsConstant()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->AddTemp(first_argument_location);
+  locations->SetOut(return_location);
+}
+
+IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() {
+  // Note that we could cache all of the data looked up here. but there's no good
+  // location for it. We don't want to add it to WellKnownClasses, to avoid creating global
+  // jni values. Adding it as state to the compiler singleton seems like wrong
+  // separation of concerns.
+  // The need for this data should be pretty rare though.
+
+  // The most common case is that the classes are in the boot image and initialized,
+  // which is easy to generate code for. We bail if not.
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  gc::Heap* heap = runtime->GetHeap();
+  IntegerValueOfInfo info;
+  info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;");
+  if (info.integer_cache == nullptr) {
+    self->ClearException();
+    return info;
+  }
+  if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) {
+    // Optimization only works if the class is initialized and in the boot image.
+    return info;
+  }
+  info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;");
+  if (info.integer == nullptr) {
+    self->ClearException();
+    return info;
+  }
+  if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) {
+    // Optimization only works if the class is initialized and in the boot image.
+    return info;
+  }
+
+  ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
+  if (field == nullptr) {
+    return info;
+  }
+  info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>(
+      field->GetObject(info.integer_cache).Ptr());
+  if (info.cache == nullptr) {
+    return info;
+  }
+
+  if (!heap->ObjectIsInBootImageSpace(info.cache)) {
+    // Optimization only works if the object is in the boot image.
+    return info;
+  }
+
+  field = info.integer->FindDeclaredInstanceField("value", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.value_offset = field->GetOffset().Int32Value();
+
+  field = info.integer_cache->FindDeclaredStaticField("low", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.low = field->GetInt(info.integer_cache);
+
+  field = info.integer_cache->FindDeclaredStaticField("high", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.high = field->GetInt(info.integer_cache);
+
+  DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1);
+  return info;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6425e13..9da5a7f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -113,6 +113,39 @@
     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   }
 
+  static void ComputeIntegerValueOfLocations(HInvoke* invoke,
+                                             CodeGenerator* codegen,
+                                             Location return_location,
+                                             Location first_argument_location);
+
+  // Temporary data structure for holding Integer.valueOf useful data. We only
+  // use it if the mirror::Class* are in the boot image, so it is fine to keep raw
+  // mirror::Class pointers in this structure.
+  struct IntegerValueOfInfo {
+    IntegerValueOfInfo()
+        : integer_cache(nullptr),
+          integer(nullptr),
+          cache(nullptr),
+          low(0),
+          high(0),
+          value_offset(0) {}
+
+    // The java.lang.IntegerCache class.
+    mirror::Class* integer_cache;
+    // The java.lang.Integer class.
+    mirror::Class* integer;
+    // Value of java.lang.IntegerCache#cache.
+    mirror::ObjectArray<mirror::Object>* cache;
+    // Value of java.lang.IntegerCache#low.
+    int32_t low;
+    // Value of java.lang.IntegerCache#high.
+    int32_t high;
+    // The offset of java.lang.Integer.value.
+    int32_t value_offset;
+  };
+
+  static IntegerValueOfInfo ComputeIntegerValueOfInfo();
+
  protected:
   IntrinsicVisitor() {}
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index c262cf9..28095c4 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -129,6 +129,7 @@
 
 IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
     : arena_(codegen->GetGraph()->GetArena()),
+      codegen_(codegen),
       assembler_(codegen->GetAssembler()),
       features_(codegen->GetInstructionSetFeatures()) {}
 
@@ -1946,6 +1947,8 @@
   }
 
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
     // The base destination address is computed later, as `temp2` is
     // used for intermediate computations.
 
@@ -2644,6 +2647,75 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(R0),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  ArmAssembler* const assembler = GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ LoadImmediate(IP, value);
+      __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    // Check bounds of our cache.
+    __ AddConstant(out, in, -info.low);
+    __ CmpConstant(out, info.high - info.low + 1);
+    Label allocate, done;
+    __ b(&allocate, HS);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
+    __ MaybeUnpoisonHeapReference(out);
+    __ b(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 7f20ea4..2840863 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -51,6 +51,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGenerator* codegen_;
   ArmAssembler* assembler_;
 
   const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 86e5429..807d6cf 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -853,7 +853,6 @@
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
-  MacroAssembler* masm = codegen->GetVIXLAssembler();
   Location base_loc = locations->InAt(1);
   Register base = WRegisterFrom(base_loc);      // Object pointer.
   Location offset_loc = locations->InAt(2);
@@ -863,8 +862,7 @@
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
-    UseScratchRegisterScope temps(masm);
-    Register temp = temps.AcquireW();
+    Register temp = WRegisterFrom(locations->GetTemp(0));
     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
                                                        trg_loc,
                                                        base,
@@ -901,6 +899,9 @@
                                                            kIntrinsified);
   if (can_call && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1559,7 +1560,10 @@
     // Load `count` field of the argument string and check if it matches the const string.
     // Also compares the compression style, if differs return false.
     __ Ldr(temp, MemOperand(arg.X(), count_offset));
+    // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
+    scratch_scope.Release(temp1);
     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
+    temp1 = scratch_scope.AcquireW();
     __ B(&return_false, ne);
   } else {
     // Load `count` fields of this and argument strings.
@@ -2381,9 +2385,14 @@
     // Temporary register IP0, obtained from the VIXL scratch register
     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
     // (because that register is clobbered by ReadBarrierMarkRegX
-    // entry points). Get an extra temporary register from the
-    // register allocator.
+    // entry points). It cannot be used in calls to
+    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+    // either. For these reasons, get a third extra temporary register
+    // from the register allocator.
     locations->AddTemp(Location::RequiresRegister());
+  } else {
+    // Cases other than Baker read barriers: the third temporary will
+    // be acquired from the VIXL scratch register pool.
   }
 }
 
@@ -2494,11 +2503,12 @@
     // We use a block to end the scratch scope before the write barrier, thus
     // freeing the temporary registers so they can be used in `MarkGCCard`.
     UseScratchRegisterScope temps(masm);
-    // Note: Because it is acquired from VIXL's scratch register pool,
-    // `temp3` might be IP0, and thus cannot be used as `ref` argument
-    // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
-    // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
-    Register temp3 = temps.AcquireW();
+    Register temp3;
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      temp3 = WRegisterFrom(locations->GetTemp(2));
+    } else {
+      temp3 = temps.AcquireW();
+    }
 
     if (!optimizations.GetDoesNotNeedTypeCheck()) {
       // Check whether all elements of the source array are assignable to the component
@@ -2704,19 +2714,7 @@
 
     Register src_curr_addr = temp1.X();
     Register dst_curr_addr = temp2.X();
-    Register src_stop_addr;
-    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-      // Temporary register IP0, obtained from the VIXL scratch
-      // register pool as `temp3`, cannot be used in
-      // ReadBarrierSystemArrayCopySlowPathARM64 (because that
-      // register is clobbered by ReadBarrierMarkRegX entry points).
-      // So another temporary register allocated by the register
-      // allocator instead.
-      DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
-      src_stop_addr = XRegisterFrom(locations->GetTemp(2));
-    } else {
-      src_stop_addr = temp3.X();
-    }
+    Register src_stop_addr = temp3.X();
 
     GenSystemArrayCopyAddresses(masm,
                                 Primitive::kPrimNot,
@@ -2732,6 +2730,8 @@
     const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
 
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
       // SystemArrayCopy implementation for Baker read barriers (see
       // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
       //
@@ -2758,10 +2758,11 @@
       __ Cmp(src_curr_addr, src_stop_addr);
       __ B(&done, eq);
 
-      Register tmp = temps.AcquireW();
       // Make sure `tmp` is not IP0, as it is clobbered by
       // ReadBarrierMarkRegX entry points in
       // ReadBarrierSystemArrayCopySlowPathARM64.
+      temps.Exclude(ip0);
+      Register tmp = temps.AcquireW();
       DCHECK_NE(LocationFrom(tmp).reg(), IP0);
 
       // /* int32_t */ monitor = src->monitor_
@@ -2924,6 +2925,79 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  MacroAssembler* masm = GetVIXLAssembler();
+
+  Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
+  UseScratchRegisterScope temps(masm);
+  Register temp = temps.AcquireW();
+  InvokeRuntimeCallingConvention calling_convention;
+  Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ Mov(temp.W(), value);
+      __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
+    // Check bounds of our cache.
+    __ Add(out.W(), in.W(), -info.low);
+    __ Cmp(out.W(), info.high - info.low + 1);
+    vixl::aarch64::Label allocate, done;
+    __ B(&allocate, hs);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    MemOperand source = HeapOperand(
+        temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
+    codegen_->Load(Primitive::kPrimNot, out, source);
+    codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
+    __ B(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 28e41cb..3c53517 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -38,7 +38,8 @@
 
 class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
  public:
-  explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena) : arena_(arena) {}
+  explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen)
+      : arena_(arena), codegen_(codegen) {}
 
   // Define visitor methods.
 
@@ -56,6 +57,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGeneratorARM64* codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64);
 };
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 70a3d38..60bcf2c 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -203,6 +203,7 @@
 
 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
     : arena_(codegen->GetGraph()->GetArena()),
+      codegen_(codegen),
       assembler_(codegen->GetAssembler()),
       features_(codegen->GetInstructionSetFeatures()) {}
 
@@ -2264,6 +2265,8 @@
   }
 
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
     // The base destination address is computed later, as `temp2` is
     // used for intermediate computations.
 
@@ -2988,6 +2991,77 @@
   __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      LocationFrom(r0),
+      LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  ArmVIXLAssembler* const assembler = GetAssembler();
+
+  vixl32::Register out = RegisterFrom(locations->Out());
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  vixl32::Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ Mov(temp, value);
+      assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    vixl32::Register in = RegisterFrom(locations->InAt(0));
+    // Check bounds of our cache.
+    __ Add(out, in, -info.low);
+    __ Cmp(out, info.high - info.low + 1);
+    vixl32::Label allocate, done;
+    __ B(hs, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
+    assembler->MaybeUnpoisonHeapReference(out);
+    __ B(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 6e79cb7..023cba1 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -47,6 +47,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGenerator* codegen_;
   ArmVIXLAssembler* assembler_;
   const ArmInstructionSetFeatures& features_;
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 64a6840..ba006ed 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1572,6 +1572,10 @@
       __ Lwr(trg, TMP, 0);
       __ Lwl(trg, TMP, 3);
     }
+
+    if (type == Primitive::kPrimNot) {
+      __ MaybeUnpoisonHeapReference(trg);
+    }
   }
 }
 
@@ -1663,6 +1667,11 @@
   if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
     Register value = locations->InAt(3).AsRegister<Register>();
 
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      __ PoisonHeapReference(AT, value);
+      value = AT;
+    }
+
     if (is_R6) {
       __ Sw(value, TMP, 0);
     } else {
@@ -1852,13 +1861,23 @@
     codegen->MarkGCCard(base, value, value_can_be_null);
   }
 
+  MipsLabel loop_head, exit_loop;
+  __ Addu(TMP, base, offset_lo);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ PoisonHeapReference(expected);
+    // Do not poison `value`, if it is the same register as
+    // `expected`, which has just been poisoned.
+    if (value != expected) {
+      __ PoisonHeapReference(value);
+    }
+  }
+
   // do {
   //   tmp_value = [tmp_ptr] - expected;
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  MipsLabel loop_head, exit_loop;
-  __ Addu(TMP, base, offset_lo);
   __ Sync(0);
   __ Bind(&loop_head);
   if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
@@ -1868,8 +1887,8 @@
       __ LlR2(out, TMP);
     }
   } else {
-      LOG(FATAL) << "Unsupported op size " << type;
-      UNREACHABLE();
+    LOG(FATAL) << "Unsupported op size " << type;
+    UNREACHABLE();
   }
   __ Subu(out, out, expected);          // If we didn't get the 'expected'
   __ Sltiu(out, out, 1);                // value, set 'out' to false, and
@@ -1894,6 +1913,15 @@
                                 // cycle atomically then retry.
   __ Bind(&exit_loop);
   __ Sync(0);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(expected);
+    // Do not unpoison `value`, if it is the same register as
+    // `expected`, which has just been unpoisoned.
+    if (value != expected) {
+      __ UnpoisonHeapReference(value);
+    }
+  }
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
@@ -1989,20 +2017,24 @@
     __ LoadConst32(out, 1);
     return;
   }
-
-  // Check if input is null, return false if it is.
-  __ Beqz(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Beqz(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Beq(str, arg, &return_true);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Lw(temp1, str, class_offset);
-  __ Lw(temp2, arg, class_offset);
-  __ Bne(temp1, temp2, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Lw(temp1, str, class_offset);
+    __ Lw(temp2, arg, class_offset);
+    __ Bne(temp1, temp2, &return_false);
+  }
 
   // Load `count` fields of this and argument strings.
   __ Lw(temp1, str, count_offset);
@@ -2682,6 +2714,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
+
 UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3888828..21c5074 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1187,6 +1187,7 @@
 
     case Primitive::kPrimNot:
       __ Lwu(trg, TMP, 0);
+      __ MaybeUnpoisonHeapReference(trg);
       break;
 
     case Primitive::kPrimLong:
@@ -1285,7 +1286,12 @@
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      __ Sw(value, TMP, 0);
+      if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+        __ PoisonHeapReference(AT, value);
+        __ Sw(AT, TMP, 0);
+      } else {
+        __ Sw(value, TMP, 0);
+      }
       break;
 
     case Primitive::kPrimLong:
@@ -1454,13 +1460,23 @@
     codegen->MarkGCCard(base, value, value_can_be_null);
   }
 
+  Mips64Label loop_head, exit_loop;
+  __ Daddu(TMP, base, offset);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ PoisonHeapReference(expected);
+    // Do not poison `value`, if it is the same register as
+    // `expected`, which has just been poisoned.
+    if (value != expected) {
+      __ PoisonHeapReference(value);
+    }
+  }
+
   // do {
   //   tmp_value = [tmp_ptr] - expected;
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  Mips64Label loop_head, exit_loop;
-  __ Daddu(TMP, base, offset);
   __ Sync(0);
   __ Bind(&loop_head);
   if (type == Primitive::kPrimLong) {
@@ -1469,6 +1485,11 @@
     // Note: We will need a read barrier here, when read barrier
     // support is added to the MIPS64 back end.
     __ Ll(out, TMP);
+    if (type == Primitive::kPrimNot) {
+      // The LL instruction sign-extends the 32-bit value, but
+      // 32-bit references must be zero-extended. Zero-extend `out`.
+      __ Dext(out, out, 0, 32);
+    }
   }
   __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
   __ Sltiu(out, out, 1);                // value, set 'out' to false, and
@@ -1487,6 +1508,15 @@
                                 // cycle atomically then retry.
   __ Bind(&exit_loop);
   __ Sync(0);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(expected);
+    // Do not unpoison `value`, if it is the same register as
+    // `expected`, which has just been unpoisoned.
+    if (value != expected) {
+      __ UnpoisonHeapReference(value);
+    }
+  }
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
@@ -1593,19 +1623,24 @@
     return;
   }
 
-  // Check if input is null, return false if it is.
-  __ Beqzc(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Beqzc(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Beqc(str, arg, &return_true);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Lw(temp1, str, class_offset);
-  __ Lw(temp2, arg, class_offset);
-  __ Bnec(temp1, temp2, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Lw(temp1, str, class_offset);
+    __ Lw(temp2, arg, class_offset);
+    __ Bnec(temp1, temp2, &return_false);
+  }
 
   // Load `count` fields of this and argument strings.
   __ Lw(temp1, str, count_offset);
@@ -2075,6 +2110,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
+
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index e1b7ea5..a671788 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3335,6 +3335,65 @@
   __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(EAX),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  X86Assembler* assembler = GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ movl(out, Immediate(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ movl(Address(out, info.value_offset), Immediate(value));
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    // Check bounds of our cache.
+    __ leal(out, Address(in, -info.low));
+    __ cmpl(out, Immediate(info.high - info.low + 1));
+    NearLabel allocate, done;
+    __ j(kAboveEqual, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ movl(out, Address(out, TIMES_4, data_offset + address));
+    __ MaybeUnpoisonHeapReference(out);
+    __ jmp(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ movl(Address(out, info.value_offset), in);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 05d270a..9a6dd98 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -39,7 +39,6 @@
   : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
 }
 
-
 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
 }
@@ -2995,6 +2994,65 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(RAX),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ movl(out, Immediate(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ movl(Address(out, info.value_offset), Immediate(value));
+    }
+  } else {
+    CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
+    // Check bounds of our cache.
+    __ leal(out, Address(in, -info.low));
+    __ cmpl(out, Immediate(info.high - info.low + 1));
+    NearLabel allocate, done;
+    __ j(kAboveEqual, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ movl(out, Address(out, TIMES_4, data_offset + address));
+    __ MaybeUnpoisonHeapReference(out);
+    __ jmp(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ movl(Address(out, info.value_offset), in);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 2d3c00f..48699b3 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -38,7 +38,8 @@
         position_(pos),
         is_singleton_(true),
         is_singleton_and_not_returned_(true),
-        is_singleton_and_not_deopt_visible_(true) {
+        is_singleton_and_not_deopt_visible_(true),
+        has_index_aliasing_(false) {
     CalculateEscape(reference_,
                     nullptr,
                     &is_singleton_,
@@ -68,13 +69,36 @@
     return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
   }
 
+  // Returns true if reference_ is a singleton and returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
+  bool IsSingletonAndNonRemovable() const {
+    return is_singleton_ &&
+           (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
+  }
+
+  bool HasIndexAliasing() {
+    return has_index_aliasing_;
+  }
+
+  void SetHasIndexAliasing(bool has_index_aliasing) {
+    // Only allow setting to true.
+    DCHECK(has_index_aliasing);
+    has_index_aliasing_ = has_index_aliasing;
+  }
+
  private:
   HInstruction* const reference_;
   const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
 
-  bool is_singleton_;                        // can only be referred to by a single name in the method,
-  bool is_singleton_and_not_returned_;       // and not returned to caller,
-  bool is_singleton_and_not_deopt_visible_;  // and not used as an environment local of HDeoptimize.
+  // Can only be referred to by a single name in the method.
+  bool is_singleton_;
+  // Is singleton and not returned to caller.
+  bool is_singleton_and_not_returned_;
+  // Is singleton and not used as an environment local of HDeoptimize.
+  bool is_singleton_and_not_deopt_visible_;
+  // Some heap locations with reference_ have array index aliasing,
+  // e.g. arr[i] and arr[j] may be the same location.
+  bool has_index_aliasing_;
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -321,6 +345,8 @@
         // Different constant indices do not alias.
         return false;
       }
+      ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+      ref_info->SetHasIndexAliasing(true);
     }
     return true;
   }
@@ -497,7 +523,8 @@
         removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
+        singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        singleton_new_arrays_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -534,20 +561,24 @@
     }
 
     // At this point, stores in possibly_removed_stores_ can be safely removed.
-    for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
-      HInstruction* store = possibly_removed_stores_[i];
+    for (HInstruction* store : possibly_removed_stores_) {
       DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
       store->GetBlock()->RemoveInstruction(store);
     }
 
     // Eliminate allocations that are not used.
-    for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
-      HInstruction* new_instance = singleton_new_instances_[i];
+    for (HInstruction* new_instance : singleton_new_instances_) {
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
         new_instance->GetBlock()->RemoveInstruction(new_instance);
       }
     }
+    for (HInstruction* new_array : singleton_new_arrays_) {
+      if (!new_array->HasNonEnvironmentUses()) {
+        new_array->RemoveEnvironmentUsers();
+        new_array->GetBlock()->RemoveInstruction(new_array);
+      }
+    }
   }
 
  private:
@@ -558,7 +589,7 @@
   void KeepIfIsStore(HInstruction* heap_value) {
     if (heap_value == kDefaultHeapValue ||
         heap_value == kUnknownHeapValue ||
-        !heap_value->IsInstanceFieldSet()) {
+        !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) {
       return;
     }
     auto idx = std::find(possibly_removed_stores_.begin(),
@@ -600,14 +631,17 @@
       for (size_t i = 0; i < heap_values.size(); i++) {
         HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
         ReferenceInfo* ref_info = location->GetReferenceInfo();
-        if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
-          // heap value is killed by loop side effects (stored into directly, or due to
-          // aliasing).
+        if (ref_info->IsSingletonAndRemovable() &&
+            !location->IsValueKilledByLoopSideEffects()) {
+          // A removable singleton's field that's not stored into inside a loop is
+          // invariant throughout the loop. Nothing to do.
+          DCHECK(ref_info->IsSingletonAndRemovable());
+        } else {
+          // heap value is killed by loop side effects (stored into directly, or
+          // due to aliasing). Or the heap value may be needed after method return
+          // or deoptimization.
           KeepIfIsStore(pre_header_heap_values[i]);
           heap_values[i] = kUnknownHeapValue;
-        } else {
-          // A singleton's field that's not stored into inside a loop is invariant throughout
-          // the loop.
         }
       }
     }
@@ -626,7 +660,7 @@
       bool from_all_predecessors = true;
       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
       HInstruction* singleton_ref = nullptr;
-      if (ref_info->IsSingletonAndRemovable()) {
+      if (ref_info->IsSingleton()) {
         // We do more analysis of liveness when merging heap values for such
         // cases since stores into such references may potentially be eliminated.
         singleton_ref = ref_info->GetReference();
@@ -652,8 +686,9 @@
         }
       }
 
-      if (merged_value == kUnknownHeapValue) {
-        // There are conflicting heap values from different predecessors.
+      if (merged_value == kUnknownHeapValue || ref_info->IsSingletonAndNonRemovable()) {
+        // There are conflicting heap values from different predecessors,
+        // or the heap value may be needed after method return or deoptimization.
         // Keep the last store in each predecessor since future loads cannot be eliminated.
         for (HBasicBlock* predecessor : predecessors) {
           ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()];
@@ -734,13 +769,16 @@
       heap_values[idx] = constant;
       return;
     }
-    if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
-      HInstruction* store = heap_value;
-      // This load must be from a singleton since it's from the same field
-      // that a "removed" store puts the value. That store must be to a singleton's field.
-      DCHECK(ref_info->IsSingleton());
-      // Get the real heap value of the store.
-      heap_value = store->InputAt(1);
+    if (heap_value != kUnknownHeapValue) {
+      if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+        HInstruction* store = heap_value;
+        // This load must be from a singleton since it's from the same
+        // field/element that a "removed" store puts the value. That store
+        // must be to a singleton's field/element.
+        DCHECK(ref_info->IsSingleton());
+        // Get the real heap value of the store.
+        heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
+      }
     }
     if (heap_value == kUnknownHeapValue) {
       // Load isn't eliminated. Put the load as the value into the HeapLocation.
@@ -796,19 +834,19 @@
     if (Equal(heap_value, value)) {
       // Store into the heap location with the same value.
       same_value = true;
-    } else if (index != nullptr) {
-      // For array element, don't eliminate stores since it can be easily aliased
-      // with non-constant index.
-    } else if (ref_info->IsSingletonAndRemovable()) {
-      // Store into a field of a singleton that's not returned. The value cannot be
-      // killed due to aliasing/invocation. It can be redundant since future loads can
+    } else if (index != nullptr && ref_info->HasIndexAliasing()) {
+      // For array element, don't eliminate stores if the index can be aliased.
+    } else if (ref_info->IsSingleton()) {
+      // Store into a field of a singleton. The value cannot be killed due to
+      // aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
       // merging or loop side effects. Stores whose values are killed due to merging/loop side
       // effects later will be removed from possibly_removed_stores_ when that is detected.
+      // Stores whose values may be needed after method return or deoptimization
+      // are also removed from possibly_removed_stores_ when that is detected.
       possibly_redundant = true;
       HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
-      DCHECK(new_instance != nullptr);
-      if (new_instance->IsFinalizable()) {
+      if (new_instance != nullptr && new_instance->IsFinalizable()) {
         // Finalizable objects escape globally. Need to keep the store.
         possibly_redundant = false;
       } else {
@@ -834,7 +872,7 @@
 
     if (!same_value) {
       if (possibly_redundant) {
-        DCHECK(instruction->IsInstanceFieldSet());
+        DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet());
         // Put the store as the heap value. If the value is loaded from heap
         // by a load later, this store isn't really redundant.
         heap_values[idx] = instruction;
@@ -914,6 +952,33 @@
                      value);
   }
 
+  void VisitDeoptimize(HDeoptimize* instruction) {
+    const ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[instruction->GetBlock()->GetBlockId()];
+    for (HInstruction* heap_value : heap_values) {
+      // Filter out fake instructions before checking instruction kind below.
+      if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) {
+        continue;
+      }
+      // A store is kept as the heap value for possibly removed stores.
+      if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+        // Check whether the reference for a store is used by an environment local of
+        // HDeoptimize.
+        HInstruction* reference = heap_value->InputAt(0);
+        DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton());
+        for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
+          HEnvironment* user = use.GetUser();
+          if (user->GetHolder() == instruction) {
+            // The singleton for the store is visible at this deoptimization
+            // point. Need to keep the store so that the heap value is
+            // seen by the interpreter.
+            KeepIfIsStore(heap_value);
+          }
+        }
+      }
+    }
+  }
+
   void HandleInvoke(HInstruction* invoke) {
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[invoke->GetBlock()->GetBlockId()];
@@ -995,6 +1060,27 @@
     }
   }
 
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
+    ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
+    if (ref_info == nullptr) {
+      // new_array isn't used for array accesses. No need to process it.
+      return;
+    }
+    if (ref_info->IsSingletonAndRemovable()) {
+      singleton_new_arrays_.push_back(new_array);
+    }
+    ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[new_array->GetBlock()->GetBlockId()];
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+      HInstruction* ref = location->GetReferenceInfo()->GetReference();
+      if (ref == new_array && location->GetIndex() != nullptr) {
+        // Array elements are set to default heap values.
+        heap_values[i] = kDefaultHeapValue;
+      }
+    }
+  }
+
   // Find an instruction's substitute if it should be removed.
   // Return the same instruction if it should not be removed.
   HInstruction* FindSubstitute(HInstruction* instruction) {
@@ -1023,6 +1109,7 @@
   ArenaVector<HInstruction*> possibly_removed_stores_;
 
   ArenaVector<HInstruction*> singleton_new_instances_;
+  ArenaVector<HInstruction*> singleton_new_arrays_;
 
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
 };
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 26c9ab8..8df513f 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -16,6 +16,7 @@
 
 #include "loop_optimization.h"
 
+#include "driver/compiler_driver.h"
 #include "linear_order.h"
 
 namespace art {
@@ -57,8 +58,10 @@
 //
 
 HLoopOptimization::HLoopOptimization(HGraph* graph,
+                                     CompilerDriver* compiler_driver,
                                      HInductionVarAnalysis* induction_analysis)
     : HOptimization(graph, kLoopOptimizationPassName),
+      compiler_driver_(compiler_driver),
       induction_range_(induction_analysis),
       loop_allocator_(nullptr),
       top_loop_(nullptr),
@@ -69,7 +72,7 @@
 }
 
 void HLoopOptimization::Run() {
-  // Well-behaved loops only.
+  // Skip if there is no loop or the graph has try-catch/irreducible loops.
   // TODO: make this less of a sledgehammer.
   if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
     return;
@@ -85,6 +88,7 @@
   LocalRun();
 
   if (top_loop_ == nullptr) {
+    // All loops have been eliminated.
     graph_->SetHasLoops(false);
   }
 
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9ddab41..0b798fc 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -23,13 +23,17 @@
 
 namespace art {
 
+class CompilerDriver;
+
 /**
  * Loop optimizations. Builds a loop hierarchy and applies optimizations to
  * the detected nested loops, such as removal of dead induction and empty loops.
  */
 class HLoopOptimization : public HOptimization {
  public:
-  HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis);
+  HLoopOptimization(HGraph* graph,
+                    CompilerDriver* compiler_driver,
+                    HInductionVarAnalysis* induction_analysis);
 
   void Run() OVERRIDE;
 
@@ -76,6 +80,9 @@
   bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
   void RemoveDeadInstructions(const HInstructionList& list);
 
+  // Compiler driver (to query ISA features).
+  const CompilerDriver* compiler_driver_;
+
   // Range information based on prior induction variable analysis.
   InductionVarRange induction_range_;
 
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 9a6b493..5b93506 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -31,7 +31,7 @@
         allocator_(&pool_),
         graph_(CreateGraph(&allocator_)),
         iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
-        loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) {
+        loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) {
     BuildGraph();
   }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 62c8910..020e446 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2179,6 +2179,9 @@
       }
     }
     if (rerun_loop_analysis) {
+      DCHECK(!outer_graph->HasIrreducibleLoops())
+          << "Recomputing loop information in graphs with irreducible loops "
+          << "is unsupported, as it could lead to loop header changes";
       outer_graph->ClearLoopInformation();
       outer_graph->ClearDominanceInformation();
       outer_graph->BuildDominatorTree();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2f258db..542b218 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1362,6 +1362,7 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
   M(BitwiseNegatedRight, Instruction)                                   \
+  M(DataProcWithShifterOp, Instruction)                                 \
   M(MultiplyAccumulate, Instruction)                                    \
   M(IntermediateAddress, Instruction)
 #endif
@@ -1373,12 +1374,7 @@
   M(ArmDexCacheArraysBase, Instruction)
 #endif
 
-#ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
-#else
-#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64DataProcWithShifterOp, Instruction)
-#endif
 
 #ifndef ART_ENABLE_CODEGEN_mips
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1738,11 +1734,11 @@
 // A HEnvironment object contains the values of virtual registers at a given location.
 class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
  public:
-  HEnvironment(ArenaAllocator* arena,
-               size_t number_of_vregs,
-               ArtMethod* method,
-               uint32_t dex_pc,
-               HInstruction* holder)
+  ALWAYS_INLINE HEnvironment(ArenaAllocator* arena,
+                             size_t number_of_vregs,
+                             ArtMethod* method,
+                             uint32_t dex_pc,
+                             HInstruction* holder)
      : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)),
        locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)),
        parent_(nullptr),
@@ -1751,7 +1747,7 @@
        holder_(holder) {
   }
 
-  HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder)
+  ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder)
       : HEnvironment(arena,
                      to_copy.Size(),
                      to_copy.GetMethod(),
@@ -1918,6 +1914,9 @@
 
   virtual bool IsControlFlow() const { return false; }
 
+  // Can the instruction throw?
+  // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
+  // could throw OOME, but it is still OK to remove them if they are unused.
   virtual bool CanThrow() const { return false; }
   bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
 
@@ -3916,6 +3915,7 @@
   bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
 
   ArtMethod* GetResolvedMethod() const { return resolved_method_; }
+  void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; }
 
   DECLARE_ABSTRACT_INSTRUCTION(Invoke);
 
@@ -3958,7 +3958,7 @@
   }
 
   uint32_t number_of_arguments_;
-  ArtMethod* const resolved_method_;
+  ArtMethod* resolved_method_;
   const uint32_t dex_method_index_;
   Intrinsics intrinsic_;
 
@@ -5545,8 +5545,6 @@
 
     // Use a known boot image Class* address, embedded in the code by the codegen.
     // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
-    // Note: codegen needs to emit a linker patch if indicated by compiler options'
-    // GetIncludePatchInformation().
     kBootImageAddress,
 
     // Load from an entry in the .bss section using a PC-relative load.
@@ -5750,8 +5748,6 @@
 
     // Use a known boot image String* address, embedded in the code by the codegen.
     // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
-    // Note: codegen needs to emit a linker patch if indicated by compiler options'
-    // GetIncludePatchInformation().
     kBootImageAddress,
 
     // Load from an entry in the .bss section using a PC-relative load.
@@ -6619,9 +6615,6 @@
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "nodes_arm.h"
 #endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "nodes_arm64.h"
-#endif
 #ifdef ART_ENABLE_CODEGEN_mips
 #include "nodes_mips.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
deleted file mode 100644
index 3f88717..0000000
--- a/compiler/optimizing/nodes_arm64.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
-#define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
-
-#include "nodes.h"
-
-namespace art {
-
-class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
- public:
-  enum OpKind {
-    kLSL,   // Logical shift left.
-    kLSR,   // Logical shift right.
-    kASR,   // Arithmetic shift right.
-    kUXTB,  // Unsigned extend byte.
-    kUXTH,  // Unsigned extend half-word.
-    kUXTW,  // Unsigned extend word.
-    kSXTB,  // Signed extend byte.
-    kSXTH,  // Signed extend half-word.
-    kSXTW,  // Signed extend word.
-
-    // Aliases.
-    kFirstShiftOp = kLSL,
-    kLastShiftOp = kASR,
-    kFirstExtensionOp = kUXTB,
-    kLastExtensionOp = kSXTW
-  };
-  HArm64DataProcWithShifterOp(HInstruction* instr,
-                              HInstruction* left,
-                              HInstruction* right,
-                              OpKind op,
-                              // The shift argument is unused if the operation
-                              // is an extension.
-                              int shift = 0,
-                              uint32_t dex_pc = kNoDexPc)
-      : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
-        instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
-    DCHECK(!instr->HasSideEffects());
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
-  }
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
-    const HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
-    return instr_kind_ == other->instr_kind_ &&
-        op_kind_ == other->op_kind_ &&
-        shift_amount_ == other->shift_amount_;
-  }
-
-  static bool IsShiftOp(OpKind op_kind) {
-    return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
-  }
-
-  static bool IsExtensionOp(OpKind op_kind) {
-    return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
-  }
-
-  // Find the operation kind and shift amount from a bitfield move instruction.
-  static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
-                                       /*out*/OpKind* op_kind,
-                                       /*out*/int* shift_amount);
-
-  InstructionKind GetInstrKind() const { return instr_kind_; }
-  OpKind GetOpKind() const { return op_kind_; }
-  int GetShiftAmount() const { return shift_amount_; }
-
-  DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
-
- private:
-  InstructionKind instr_kind_;
-  OpKind op_kind_;
-  int shift_amount_;
-
-  friend std::ostream& operator<<(std::ostream& os, OpKind op);
-
-  DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
-};
-
-std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_shared.cc
similarity index 63%
rename from compiler/optimizing/nodes_arm64.cc
rename to compiler/optimizing/nodes_shared.cc
index ac2f093..f145bf9 100644
--- a/compiler/optimizing/nodes_arm64.cc
+++ b/compiler/optimizing/nodes_shared.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,15 +15,15 @@
  */
 
 #include "common_arm64.h"
-#include "nodes.h"
+#include "nodes_shared.h"
 
 namespace art {
 
-using arm64::helpers::CanFitInShifterOperand;
+using helpers::CanFitInShifterOperand;
 
-void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
-                                                           /*out*/OpKind* op_kind,
-                                                           /*out*/int* shift_amount) {
+void HDataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+                                                      /*out*/OpKind* op_kind,
+                                                      /*out*/int* shift_amount) {
   DCHECK(CanFitInShifterOperand(instruction));
   if (instruction->IsShl()) {
     *op_kind = kLSL;
@@ -41,12 +41,11 @@
     int result_size = Primitive::ComponentSize(result_type);
     int input_size = Primitive::ComponentSize(input_type);
     int min_size = std::min(result_size, input_size);
-    // This follows the logic in
-    // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
     if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
-      // There is actually nothing to do. The register will be used as a W
-      // register, discarding the top bits. This is represented by the default
-      // encoding 'LSL 0'.
+      // There is actually nothing to do. On ARM the high register from the
+      // pair will be ignored. On ARM64 the register will be used as a W
+      // register, discarding the top bits. This is represented by the
+      // default encoding 'LSL 0'.
       *op_kind = kLSL;
       *shift_amount = 0;
     } else if (result_type == Primitive::kPrimChar ||
@@ -64,17 +63,17 @@
   }
 }
 
-std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op) {
   switch (op) {
-    case HArm64DataProcWithShifterOp::kLSL:  return os << "LSL";
-    case HArm64DataProcWithShifterOp::kLSR:  return os << "LSR";
-    case HArm64DataProcWithShifterOp::kASR:  return os << "ASR";
-    case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
-    case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
-    case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
-    case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
-    case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
-    case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+    case HDataProcWithShifterOp::kLSL:  return os << "LSL";
+    case HDataProcWithShifterOp::kLSR:  return os << "LSR";
+    case HDataProcWithShifterOp::kASR:  return os << "ASR";
+    case HDataProcWithShifterOp::kUXTB: return os << "UXTB";
+    case HDataProcWithShifterOp::kUXTH: return os << "UXTH";
+    case HDataProcWithShifterOp::kUXTW: return os << "UXTW";
+    case HDataProcWithShifterOp::kSXTB: return os << "SXTB";
+    case HDataProcWithShifterOp::kSXTH: return os << "SXTH";
+    case HDataProcWithShifterOp::kSXTW: return os << "SXTW";
     default:
       LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 814202e..c6bfbcc 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,81 @@
   DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
 };
 
+class HDataProcWithShifterOp FINAL : public HExpression<2> {
+ public:
+  enum OpKind {
+    kLSL,   // Logical shift left.
+    kLSR,   // Logical shift right.
+    kASR,   // Arithmetic shift right.
+    kUXTB,  // Unsigned extend byte.
+    kUXTH,  // Unsigned extend half-word.
+    kUXTW,  // Unsigned extend word.
+    kSXTB,  // Signed extend byte.
+    kSXTH,  // Signed extend half-word.
+    kSXTW,  // Signed extend word.
+
+    // Aliases.
+    kFirstShiftOp = kLSL,
+    kLastShiftOp = kASR,
+    kFirstExtensionOp = kUXTB,
+    kLastExtensionOp = kSXTW
+  };
+  HDataProcWithShifterOp(HInstruction* instr,
+                         HInstruction* left,
+                         HInstruction* right,
+                         OpKind op,
+                         // The shift argument is unused if the operation
+                         // is an extension.
+                         int shift = 0,
+                         uint32_t dex_pc = kNoDexPc)
+      : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+        instr_kind_(instr->GetKind()), op_kind_(op),
+        shift_amount_(shift & (instr->GetType() == Primitive::kPrimInt
+            ? kMaxIntShiftDistance
+            : kMaxLongShiftDistance)) {
+    DCHECK(!instr->HasSideEffects());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
+    const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp();
+    return instr_kind_ == other->instr_kind_ &&
+        op_kind_ == other->op_kind_ &&
+        shift_amount_ == other->shift_amount_;
+  }
+
+  static bool IsShiftOp(OpKind op_kind) {
+    return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+  }
+
+  static bool IsExtensionOp(OpKind op_kind) {
+    return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+  }
+
+  // Find the operation kind and shift amount from a bitfield move instruction.
+  static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+                                       /*out*/OpKind* op_kind,
+                                       /*out*/int* shift_amount);
+
+  InstructionKind GetInstrKind() const { return instr_kind_; }
+  OpKind GetOpKind() const { return op_kind_; }
+  int GetShiftAmount() const { return shift_amount_; }
+
+  DECLARE_INSTRUCTION(DataProcWithShifterOp);
+
+ private:
+  InstructionKind instr_kind_;
+  OpKind op_kind_;
+  int shift_amount_;
+
+  friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+  DISALLOW_COPY_AND_ASSIGN(HDataProcWithShifterOp);
+};
+
+std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op);
 
 }  // namespace art
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f72bd6a..d6153b0 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -56,6 +56,7 @@
 #include "builder.h"
 #include "cha_guard_optimization.h"
 #include "code_generator.h"
+#include "code_sinking.h"
 #include "compiled_method.h"
 #include "compiler.h"
 #include "constant_folding.h"
@@ -506,7 +507,7 @@
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
-    return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+    return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
   } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
     return new (arena) IntrinsicsRecognizer(graph, stats);
   } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
@@ -518,9 +519,11 @@
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
-    return new (arena) HLoopOptimization(graph, most_recent_induction);
+    return new (arena) HLoopOptimization(graph, driver, most_recent_induction);
   } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
     return new (arena) CHAGuardOptimization(graph);
+  } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
+    return new (arena) CodeSinking(graph, stats);
 #ifdef ART_ENABLE_CODEGEN_arm
   } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
     return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -765,28 +768,32 @@
   HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
-  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
+  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
       graph, "constant_folding$after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
-  SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
-  LICM* licm = new (arena) LICM(graph, *side_effects, stats);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
+  SideEffectsAnalysis* side_effects1 = new (arena) SideEffectsAnalysis(
+      graph, "side_effects$before_gvn");
+  SideEffectsAnalysis* side_effects2 = new (arena) SideEffectsAnalysis(
+      graph, "side_effects$before_lse");
+  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects1);
+  LICM* licm = new (arena) LICM(graph, *side_effects1, stats);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
-  HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
+  HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$after_inlining");
+      graph, codegen, stats, "instruction_simplifier$after_inlining");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$after_bce");
+      graph, codegen, stats, "instruction_simplifier$after_bce");
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$before_codegen");
+      graph, codegen, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
   CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
+  CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -806,7 +813,7 @@
     fold2,  // TODO: if we don't inline we can also skip fold2.
     simplify2,
     dce2,
-    side_effects,
+    side_effects1,
     gvn,
     licm,
     induction,
@@ -814,9 +821,11 @@
     loop,
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
+    side_effects2,
     lse,
     cha_guard,
     dce3,
+    code_sinking,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 203b1ec..ae9a811 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
+#include <atomic>
 #include <iomanip>
 #include <string>
 #include <type_traits>
@@ -67,14 +68,18 @@
   kImplicitNullCheckGenerated,
   kExplicitNullCheckGenerated,
   kSimplifyIf,
+  kInstructionSunk,
   kLastStat
 };
 
 class OptimizingCompilerStats {
  public:
-  OptimizingCompilerStats() {}
+  OptimizingCompilerStats() {
+    // The std::atomic<> default constructor leaves values uninitialized, so initialize them now.
+    Reset();
+  }
 
-  void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+  void RecordStat(MethodCompilationStat stat, uint32_t count = 1) {
     compile_stats_[stat] += count;
   }
 
@@ -93,7 +98,7 @@
           << " methods: " << std::fixed << std::setprecision(2)
           << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
-      for (int i = 0; i < kLastStat; i++) {
+      for (size_t i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
           LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": "
               << compile_stats_[i];
@@ -102,6 +107,21 @@
     }
   }
 
+  void AddTo(OptimizingCompilerStats* other_stats) {
+    for (size_t i = 0; i != kLastStat; ++i) {
+      uint32_t count = compile_stats_[i];
+      if (count != 0) {
+        other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count);
+      }
+    }
+  }
+
+  void Reset() {
+    for (size_t i = 0; i != kLastStat; ++i) {
+      compile_stats_[i] = 0u;
+    }
+  }
+
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
     std::string name;
@@ -147,6 +167,7 @@
       case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
       case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
       case kSimplifyIf: name = "SimplifyIf"; break;
+      case kInstructionSunk: name = "InstructionSunk"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
@@ -156,7 +177,7 @@
     return "OptStat#" + name;
   }
 
-  AtomicInteger compile_stats_[kLastStat];
+  std::atomic<uint32_t> compile_stats_[kLastStat];
 
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
 };
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 59523a9..8a9c1cc 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -306,7 +306,7 @@
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
-  UsePosition* env_use = current->GetFirstEnvironmentUse();
+  EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -323,7 +323,6 @@
         use = use->GetNext();
       }
       while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
-        DCHECK(!use->GetIsEnvironment());
         DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
         if (!use->IsSynthesized()) {
           LocationSummary* locations = use->GetUser()->GetLocations();
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 1a391ce..6354e76 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -629,21 +629,21 @@
     if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
       HInputsRef inputs = defined_by->GetInputs();
       for (size_t i = 0; i < inputs.size(); ++i) {
-        // Take the last interval of the input. It is the location of that interval
-        // that will be used at `defined_by`.
-        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
-        // Note that interval may have not been processed yet.
-        // TODO: Handle non-split intervals last in the work list.
-        if (locations->InAt(i).IsValid()
-            && interval->HasRegister()
-            && interval->SameRegisterKind(*current)) {
-          // The input must be live until the end of `defined_by`, to comply to
-          // the linear scan algorithm. So we use `defined_by`'s end lifetime
-          // position to check whether the input is dead or is inactive after
-          // `defined_by`.
-          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
-          size_t position = defined_by->GetLifetimePosition() + 1;
-          FreeIfNotCoverAt(interval, position, free_until);
+        if (locations->InAt(i).IsValid()) {
+          // Take the last interval of the input. It is the location of that interval
+          // that will be used at `defined_by`.
+          LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+          // Note that interval may have not been processed yet.
+          // TODO: Handle non-split intervals last in the work list.
+          if (interval->HasRegister() && interval->SameRegisterKind(*current)) {
+            // The input must be live until the end of `defined_by`, to comply to
+            // the linear scan algorithm. So we use `defined_by`'s end lifetime
+            // position to check whether the input is dead or is inactive after
+            // `defined_by`.
+            DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+            size_t position = defined_by->GetLifetimePosition() + 1;
+            FreeIfNotCoverAt(interval, position, free_until);
+          }
         }
       }
     }
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 2227872..667afb1 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -912,9 +912,9 @@
   // Create an interval with lifetime holes.
   static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
   LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
-  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_);
-  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_);
-  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, false, 8, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, false, 7, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, false, 6, first->first_use_);
 
   locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
@@ -934,9 +934,9 @@
   // before lifetime position 6 yet.
   static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
   LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
-  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_);
-  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_);
-  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, false, 8, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, false, 4, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, false, 3, third->first_use_);
   locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
   third = third->SplitAt(3);
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index e3701fb..558dcc4 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -31,8 +31,8 @@
   last_visited_latency_ = kArm64IntegerOpLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitArm64DataProcWithShifterOp(
-    HArm64DataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp(
+    HDataProcWithShifterOp* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
 }
 
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 702027c..7a33720 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -74,7 +74,8 @@
 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
-  M(IntermediateAddress, unused)
+  M(IntermediateAddress, unused)                 \
+  M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
   void Visit##type(H##type* instruction) OVERRIDE;
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index be40092..7bd38c7 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -41,7 +41,7 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
-        ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+        SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
       } else if (instruction->IsLoadString()) {
         ProcessLoadString(instruction->AsLoadString());
       }
@@ -65,12 +65,12 @@
 }
 
 static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) {
-  // Including patch information means the AOT code will be patched, which we don't
-  // support in the compiler, and is anyways moving away b/33192586.
-  return IsInBootImage(method) && !options.GetCompilePic() && !options.GetIncludePatchInformation();
+  return IsInBootImage(method) && !options.GetCompilePic();
 }
 
-void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+
+void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                              CodeGenerator* codegen) {
   if (invoke->IsStringInit()) {
     // Not using the dex cache arrays. But we could still try to use a better dispatch...
     // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -97,12 +97,12 @@
 
   // We don't optimize for debuggable as it would prevent us from obsoleting the method in some
   // situations.
-  if (callee == codegen_->GetGraph()->GetArtMethod() && !codegen_->GetGraph()->IsDebuggable()) {
+  if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) {
     // Recursive call.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
   } else if (Runtime::Current()->UseJitCompilation() ||
-      AOTCanEmbedMethod(callee, codegen_->GetCompilerOptions())) {
+      AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) {
     // JIT or on-device AOT compilation referencing a boot image method.
     // Use the method address directly.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
@@ -111,13 +111,17 @@
   } else {
     // Use PC-relative access to the dex cache arrays.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
-    DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
-                                &graph_->GetDexFile());
+    // Note: we use the invoke's graph instead of the codegen graph, which are
+    // different when inlining (the codegen graph is the most outer graph). The
+    // invoke's dex method index is relative to the dex file where the invoke's graph
+    // was built from.
+    DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()),
+                                &invoke->GetBlock()->GetGraph()->GetDexFile());
     method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex());
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   }
 
-  if (graph_->IsDebuggable()) {
+  if (codegen->GetGraph()->IsDebuggable()) {
     // For debuggable apps always use the code pointer from ArtMethod
     // so that we don't circumvent instrumentation stubs if installed.
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
@@ -127,14 +131,14 @@
       method_load_kind, code_ptr_location, method_load_data
   };
   HInvokeStaticOrDirect::DispatchInfo dispatch_info =
-      codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
+      codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
   invoke->SetDispatchInfo(dispatch_info);
 }
 
-HLoadClass::LoadKind HSharpening::SharpenClass(HLoadClass* load_class,
-                                               CodeGenerator* codegen,
-                                               CompilerDriver* compiler_driver,
-                                               const DexCompilationUnit& dex_compilation_unit) {
+HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
+                                                       CodeGenerator* codegen,
+                                                       CompilerDriver* compiler_driver,
+                                                       const DexCompilationUnit& dex_compilation_unit) {
   Handle<mirror::Class> klass = load_class->GetClass();
   DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 4240b2f..10707c7 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -48,14 +48,16 @@
   static constexpr const char* kSharpeningPassName = "sharpening";
 
   // Used by the builder and the inliner.
-  static HLoadClass::LoadKind SharpenClass(HLoadClass* load_class,
-                                           CodeGenerator* codegen,
-                                           CompilerDriver* compiler_driver,
-                                           const DexCompilationUnit& dex_compilation_unit)
+  static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
+                                                   CodeGenerator* codegen,
+                                                   CompilerDriver* compiler_driver,
+                                                   const DexCompilationUnit& dex_compilation_unit)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Used by Sharpening and InstructionSimplifier.
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+
  private:
-  void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
   void ProcessLoadString(HLoadString* load_string);
 
   CodeGenerator* codegen_;
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index bac6088..fea47e6 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -25,8 +25,8 @@
 
 class SideEffectsAnalysis : public HOptimization {
  public:
-  explicit SideEffectsAnalysis(HGraph* graph)
-      : HOptimization(graph, kSideEffectsAnalysisPassName),
+  SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName)
+      : HOptimization(graph, pass_name),
         graph_(graph),
         block_effects_(graph->GetBlocks().size(),
                        graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)),
@@ -41,7 +41,7 @@
 
   bool HasRun() const { return has_run_; }
 
-  static constexpr const char* kSideEffectsAnalysisPassName = "SideEffects";
+  static constexpr const char* kSideEffectsAnalysisPassName = "side_effects";
 
  private:
   void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b62bf4e..340d0cc 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -17,9 +17,10 @@
 #ifndef ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
 #define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
 
-#include "nodes.h"
 #include <iostream>
 
+#include "nodes.h"
+
 namespace art {
 
 class CodeGenerator;
@@ -103,21 +104,20 @@
  */
 class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
-  UsePosition(HInstruction* user,
-              HEnvironment* environment,
-              size_t input_index,
-              size_t position,
-              UsePosition* next)
+  UsePosition(HInstruction* user, size_t input_index, size_t position, UsePosition* next)
       : user_(user),
-        environment_(environment),
         input_index_(input_index),
         position_(position),
         next_(next) {
-    DCHECK(environment == nullptr || user == nullptr);
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
-  static constexpr size_t kNoInput = -1;
+  explicit UsePosition(size_t position)
+      : user_(nullptr),
+        input_index_(kNoInput),
+        position_(dchecked_integral_cast<uint32_t>(position)),
+        next_(nullptr) {
+  }
 
   size_t GetPosition() const { return position_; }
 
@@ -125,9 +125,7 @@
   void SetNext(UsePosition* next) { next_ = next; }
 
   HInstruction* GetUser() const { return user_; }
-  HEnvironment* GetEnvironment() const { return environment_; }
 
-  bool GetIsEnvironment() const { return environment_ != nullptr; }
   bool IsSynthesized() const { return user_ == nullptr; }
 
   size_t GetInputIndex() const { return input_index_; }
@@ -142,20 +140,20 @@
 
   UsePosition* Dup(ArenaAllocator* allocator) const {
     return new (allocator) UsePosition(
-        user_, environment_, input_index_, position_,
+        user_, input_index_, position_,
         next_ == nullptr ? nullptr : next_->Dup(allocator));
   }
 
   bool RequiresRegister() const {
-    if (GetIsEnvironment()) return false;
     if (IsSynthesized()) return false;
     Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
     return location.IsUnallocated() && location.RequiresRegisterKind();
   }
 
  private:
+  static constexpr uint32_t kNoInput = static_cast<uint32_t>(-1);
+
   HInstruction* const user_;
-  HEnvironment* const environment_;
   const size_t input_index_;
   const size_t position_;
   UsePosition* next_;
@@ -163,6 +161,50 @@
   DISALLOW_COPY_AND_ASSIGN(UsePosition);
 };
 
+/**
+ * An environment use position represents a live interval for environment use at a given position.
+ */
+class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
+ public:
+  EnvUsePosition(HEnvironment* environment,
+                 size_t input_index,
+                 size_t position,
+                 EnvUsePosition* next)
+      : environment_(environment),
+        input_index_(input_index),
+        position_(position),
+        next_(next) {
+    DCHECK(environment != nullptr);
+    DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
+  }
+
+  size_t GetPosition() const { return position_; }
+
+  EnvUsePosition* GetNext() const { return next_; }
+  void SetNext(EnvUsePosition* next) { next_ = next; }
+
+  HEnvironment* GetEnvironment() const { return environment_; }
+  size_t GetInputIndex() const { return input_index_; }
+
+  void Dump(std::ostream& stream) const {
+    stream << position_;
+  }
+
+  EnvUsePosition* Dup(ArenaAllocator* allocator) const {
+    return new (allocator) EnvUsePosition(
+        environment_, input_index_, position_,
+        next_ == nullptr ? nullptr : next_->Dup(allocator));
+  }
+
+ private:
+  HEnvironment* const environment_;
+  const size_t input_index_;
+  const size_t position_;
+  EnvUsePosition* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(EnvUsePosition);
+};
+
 class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   explicit SafepointPosition(HInstruction* instruction)
@@ -227,7 +269,7 @@
     DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
     first_use_ = new (allocator_) UsePosition(
-        instruction, /* environment */ nullptr, temp_index, position, first_use_);
+        instruction, temp_index, position, first_use_);
     AddRange(position, position + 1);
   }
 
@@ -276,7 +318,7 @@
       }
       DCHECK(first_use_->GetPosition() + 1 == position);
       UsePosition* new_use = new (allocator_) UsePosition(
-          instruction, nullptr /* environment */, input_index, position, cursor->GetNext());
+          instruction, input_index, position, cursor->GetNext());
       cursor->SetNext(new_use);
       if (first_range_->GetEnd() == first_use_->GetPosition()) {
         first_range_->end_ = position;
@@ -285,11 +327,11 @@
     }
 
     if (is_environment) {
-      first_env_use_ = new (allocator_) UsePosition(
-          nullptr /* instruction */, environment, input_index, position, first_env_use_);
+      first_env_use_ = new (allocator_) EnvUsePosition(
+          environment, input_index, position, first_env_use_);
     } else {
       first_use_ = new (allocator_) UsePosition(
-          instruction, nullptr /* environment */, input_index, position, first_use_);
+          instruction, input_index, position, first_use_);
     }
 
     if (is_environment && !keep_alive) {
@@ -328,10 +370,10 @@
       AddBackEdgeUses(*block);
     }
     first_use_ = new (allocator_) UsePosition(
-        instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
+        instruction, input_index, block->GetLifetimeEnd(), first_use_);
   }
 
-  void AddRange(size_t start, size_t end) {
+  ALWAYS_INLINE void AddRange(size_t start, size_t end) {
     if (first_range_ == nullptr) {
       first_range_ = last_range_ = range_search_start_ =
           new (allocator_) LiveRange(start, end, first_range_);
@@ -538,7 +580,7 @@
     return first_use_;
   }
 
-  UsePosition* GetFirstEnvironmentUse() const {
+  EnvUsePosition* GetFirstEnvironmentUse() const {
     return first_env_use_;
   }
 
@@ -676,7 +718,7 @@
       current = current->GetNext();
     }
     stream << "}, uses: { ";
-    UsePosition* use = first_use_;
+    const UsePosition* use = first_use_;
     if (use != nullptr) {
       do {
         use->Dump(stream);
@@ -684,12 +726,12 @@
       } while ((use = use->GetNext()) != nullptr);
     }
     stream << "}, { ";
-    use = first_env_use_;
-    if (use != nullptr) {
+    const EnvUsePosition* env_use = first_env_use_;
+    if (env_use != nullptr) {
       do {
-        use->Dump(stream);
+        env_use->Dump(stream);
         stream << " ";
-      } while ((use = use->GetNext()) != nullptr);
+      } while ((env_use = env_use->GetNext()) != nullptr);
     }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
@@ -1015,12 +1057,7 @@
       DCHECK(last_in_new_list == nullptr ||
              back_edge_use_position > last_in_new_list->GetPosition());
 
-      UsePosition* new_use = new (allocator_) UsePosition(
-          /* user */ nullptr,
-          /* environment */ nullptr,
-          UsePosition::kNoInput,
-          back_edge_use_position,
-          /* next */ nullptr);
+      UsePosition* new_use = new (allocator_) UsePosition(back_edge_use_position);
 
       if (last_in_new_list != nullptr) {
         // Going outward. The latest created use needs to point to the new use.
@@ -1056,7 +1093,7 @@
 
   // Uses of this interval. Note that this linked list is shared amongst siblings.
   UsePosition* first_use_;
-  UsePosition* first_env_use_;
+  EnvUsePosition* first_env_use_;
 
   // The instruction type this interval corresponds to.
   const Primitive::Type type_;
@@ -1210,8 +1247,7 @@
 
   // Returns whether `instruction` in an HEnvironment held by `env_holder`
   // should be kept live by the HEnvironment.
-  static bool ShouldBeLiveForEnvironment(HInstruction* env_holder,
-                                         HInstruction* instruction) {
+  static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) {
     if (instruction == nullptr) return false;
     // A value that's not live in compiled code may still be needed in interpreter,
     // due to code motion, etc.
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
new file mode 100644
index 0000000..1916c73
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "driver/compiler_options.h"
+#include "code_generator.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+class SsaLivenessAnalysisTest : public testing::Test {
+ public:
+  SsaLivenessAnalysisTest()
+      : pool_(),
+        allocator_(&pool_),
+        graph_(CreateGraph(&allocator_)),
+        compiler_options_(),
+        instruction_set_(kRuntimeISA) {
+    std::string error_msg;
+    instruction_set_features_ =
+        InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg);
+    codegen_ = CodeGenerator::Create(graph_,
+                                     instruction_set_,
+                                     *instruction_set_features_,
+                                     compiler_options_);
+    CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture.";
+    // Create entry block.
+    entry_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(entry_);
+    graph_->SetEntryBlock(entry_);
+  }
+
+ protected:
+  HBasicBlock* CreateSuccessor(HBasicBlock* block) {
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* successor = new (&allocator_) HBasicBlock(graph);
+    graph->AddBlock(successor);
+    block->AddSuccessor(successor);
+    return successor;
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+  CompilerOptions compiler_options_;
+  InstructionSet instruction_set_;
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<CodeGenerator> codegen_;
+  HBasicBlock* entry_;
+};
+
+TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
+  HInstruction* arg = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
+  entry_->AddInstruction(arg);
+
+  HBasicBlock* block = CreateSuccessor(entry_);
+  HInstruction* ret = new (&allocator_) HReturn(arg);
+  block->AddInstruction(ret);
+  block->AddInstruction(new (&allocator_) HExit());
+
+  graph_->BuildDominatorTree();
+  SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+  ssa_analysis.Analyze();
+
+  std::ostringstream arg_dump;
+  arg->GetLiveInterval()->Dump(arg_dump);
+  EXPECT_STREQ("ranges: { [2,6) }, uses: { 6 }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+               arg_dump.str().c_str());
+}
+
+TEST_F(SsaLivenessAnalysisTest, TestAput) {
+  HInstruction* array = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+  HInstruction* index = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+  HInstruction* value = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt);
+  HInstruction* extra_arg1 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt);
+  HInstruction* extra_arg2 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot);
+  ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 },
+                                  allocator_.Adapter());
+  for (HInstruction* insn : args) {
+    entry_->AddInstruction(insn);
+  }
+
+  HBasicBlock* block = CreateSuccessor(entry_);
+  HInstruction* null_check = new (&allocator_) HNullCheck(array, 0);
+  block->AddInstruction(null_check);
+  HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                null_check);
+  null_check_env->CopyFrom(args);
+  null_check->SetRawEnvironment(null_check_env);
+  HInstruction* length = new (&allocator_) HArrayLength(array, 0);
+  block->AddInstruction(length);
+  HInstruction* bounds_check = new (&allocator_) HBoundsCheck(index, length, /* dex_pc */ 0u);
+  block->AddInstruction(bounds_check);
+  HEnvironment* bounds_check_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                  /* number_of_vregs */ 5,
+                                                                  /* method */ nullptr,
+                                                                  /* dex_pc */ 0u,
+                                                                  bounds_check);
+  bounds_check_env->CopyFrom(args);
+  bounds_check->SetRawEnvironment(bounds_check_env);
+  HInstruction* array_set =
+      new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0);
+  block->AddInstruction(array_set);
+
+  graph_->BuildDominatorTree();
+  SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+  ssa_analysis.Analyze();
+
+  EXPECT_FALSE(graph_->IsDebuggable());
+  EXPECT_EQ(18u, bounds_check->GetLifetimePosition());
+  static const char* const expected[] = {
+      "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      // Environment uses do not keep the non-reference argument alive.
+      "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+      // Environment uses keep the reference argument alive.
+      "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+  };
+  ASSERT_EQ(arraysize(expected), args.size());
+  size_t arg_index = 0u;
+  for (HInstruction* arg : args) {
+    std::ostringstream arg_dump;
+    arg->GetLiveInterval()->Dump(arg_dump);
+    EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index;
+    ++arg_index;
+  }
+}
+
+TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
+  HInstruction* array = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+  HInstruction* index = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+  HInstruction* value = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt);
+  HInstruction* extra_arg1 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt);
+  HInstruction* extra_arg2 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot);
+  ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 },
+                                  allocator_.Adapter());
+  for (HInstruction* insn : args) {
+    entry_->AddInstruction(insn);
+  }
+
+  HBasicBlock* block = CreateSuccessor(entry_);
+  HInstruction* null_check = new (&allocator_) HNullCheck(array, 0);
+  block->AddInstruction(null_check);
+  HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                null_check);
+  null_check_env->CopyFrom(args);
+  null_check->SetRawEnvironment(null_check_env);
+  HInstruction* length = new (&allocator_) HArrayLength(array, 0);
+  block->AddInstruction(length);
+  // Use HAboveOrEqual+HDeoptimize as the bounds check.
+  HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
+  block->AddInstruction(ae);
+  HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u);
+  block->AddInstruction(deoptimize);
+  HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                  /* number_of_vregs */ 5,
+                                                                  /* method */ nullptr,
+                                                                  /* dex_pc */ 0u,
+                                                                  deoptimize);
+  deoptimize_env->CopyFrom(args);
+  deoptimize->SetRawEnvironment(deoptimize_env);
+  HInstruction* array_set =
+      new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0);
+  block->AddInstruction(array_set);
+
+  graph_->BuildDominatorTree();
+  SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+  ssa_analysis.Analyze();
+
+  EXPECT_FALSE(graph_->IsDebuggable());
+  EXPECT_EQ(20u, deoptimize->GetLifetimePosition());
+  static const char* const expected[] = {
+      "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+      // Environment use in HDeoptimize keeps even the non-reference argument alive.
+      "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+      // Environment uses keep the reference argument alive.
+      "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+  };
+  ASSERT_EQ(arraysize(expected), args.size());
+  size_t arg_index = 0u;
+  for (HInstruction* arg : args) {
+    std::ostringstream arg_dump;
+    arg->GetLiveInterval()->Dump(arg_dump);
+    EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index;
+    ++arg_index;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index eeae96e..4d12ad6 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -16,8 +16,6 @@
 
 #include "stack_map_stream.h"
 
-#include <unordered_map>
-
 #include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "optimizing/optimizing_compiler.h"
@@ -526,7 +524,7 @@
 
 size_t StackMapStream::PrepareRegisterMasks() {
   register_masks_.resize(stack_maps_.size(), 0u);
-  std::unordered_map<uint32_t, size_t> dedupe;
+  ArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
   for (StackMapEntry& stack_map : stack_maps_) {
     const size_t index = dedupe.size();
     stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
@@ -541,10 +539,11 @@
   stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
   // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
   // when copying out from stack_masks_.
-  std::unordered_map<MemoryRegion,
-                     size_t,
-                     FNVHash<MemoryRegion>,
-                     MemoryRegion::ContentEquals> dedup(stack_maps_.size());
+  ArenaUnorderedMap<MemoryRegion,
+                    size_t,
+                    FNVHash<MemoryRegion>,
+                    MemoryRegion::ContentEquals> dedup(
+                        stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream));
   for (StackMapEntry& stack_map : stack_maps_) {
     size_t index = dedup.size();
     MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index e5eef37..6afc3dd 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -230,6 +230,7 @@
   if (!CanHoldStoreOffsetThumb(type, offset)) {
     CHECK_NE(base.GetCode(), kIpCode);
     if ((reg.GetCode() != kIpCode) &&
+        (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
       tmp_reg = temps.Acquire();
     } else {
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 322f6c4..e81e767 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -135,6 +135,16 @@
   // jumping within 2KB range. For B(cond, label), because the supported branch range is 256
   // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps.
   void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true);
+
+  // Use literal for generating double constant if it doesn't fit VMOV encoding.
+  void Vmov(vixl32::DRegister rd, double imm) {
+    if (vixl::VFP::IsImmFP64(imm)) {
+      MacroAssembler::Vmov(rd, imm);
+    } else {
+      MacroAssembler::Vldr(rd, imm);
+    }
+  }
+  using MacroAssembler::Vmov;
 };
 
 class ArmVIXLAssembler FINAL : public Assembler {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 5e83e82..2e2231b 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3475,8 +3475,8 @@
   CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && unpoison_reference) {
-    Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister());
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsCoreRegister());
   }
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 2fca185..1a5a23d 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -501,8 +501,10 @@
                            bool is_float = false);
 
  private:
+  // This will be used as an argument for loads/stores
+  // when there is no need for implicit null checks.
   struct NoImplicitNullChecker {
-    void operator()() {}
+    void operator()() const {}
   };
 
  public:
@@ -727,6 +729,38 @@
   void Pop(Register rd);
   void PopAndReturn(Register rd, Register rt);
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `src` and store it in `dst`.
+  void PoisonHeapReference(Register dst, Register src) {
+    // dst = -src.
+    Subu(dst, ZERO, src);
+  }
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(Register reg) {
+    // reg = -reg.
+    PoisonHeapReference(reg, reg);
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(Register reg) {
+    // reg = -reg.
+    Subu(reg, ZERO, reg);
+  }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
   void Bind(Label* label) OVERRIDE {
     Bind(down_cast<MipsLabel*>(label));
   }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 998f2c7..39eb589 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -488,6 +488,11 @@
   EmitI(0xf, rs, rt, imm16);
 }
 
+void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  EmitI(0x1d, rs, rt, imm16);
+}
+
 void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) {
   EmitI(1, rs, static_cast<GpuRegister>(6), imm16);
 }
@@ -2015,80 +2020,18 @@
   Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
 }
 
-void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
+void Mips64Assembler::LoadFromOffset(LoadOperandType type,
+                                     GpuRegister reg,
+                                     GpuRegister base,
                                      int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
-
-  switch (type) {
-    case kLoadSignedByte:
-      Lb(reg, base, offset);
-      break;
-    case kLoadUnsignedByte:
-      Lbu(reg, base, offset);
-      break;
-    case kLoadSignedHalfword:
-      Lh(reg, base, offset);
-      break;
-    case kLoadUnsignedHalfword:
-      Lhu(reg, base, offset);
-      break;
-    case kLoadWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Lw(reg, base, offset);
-      break;
-    case kLoadUnsignedWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Lwu(reg, base, offset);
-      break;
-    case kLoadDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Lwu(reg, base, offset);
-        Lwu(TMP2, base, offset + kMips64WordSize);
-        Dinsu(reg, TMP2, 32, 32);
-      } else {
-        Ld(reg, base, offset);
-      }
-      break;
-  }
+  LoadFromOffset<>(type, reg, base, offset);
 }
 
-void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
+void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type,
+                                        FpuRegister reg,
+                                        GpuRegister base,
                                         int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
-
-  switch (type) {
-    case kLoadWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Lwc1(reg, base, offset);
-      break;
-    case kLoadDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Lwc1(reg, base, offset);
-        Lw(TMP2, base, offset + kMips64WordSize);
-        Mthc1(TMP2, reg);
-      } else {
-        Ldc1(reg, base, offset);
-      }
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
+  LoadFpuFromOffset<>(type, reg, base, offset);
 }
 
 void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset,
@@ -2118,72 +2061,18 @@
   }
 }
 
-void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
+void Mips64Assembler::StoreToOffset(StoreOperandType type,
+                                    GpuRegister reg,
+                                    GpuRegister base,
                                     int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
-
-  switch (type) {
-    case kStoreByte:
-      Sb(reg, base, offset);
-      break;
-    case kStoreHalfword:
-      Sh(reg, base, offset);
-      break;
-    case kStoreWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Sw(reg, base, offset);
-      break;
-    case kStoreDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Sw(reg, base, offset);
-        Dsrl32(TMP2, reg, 0);
-        Sw(TMP2, base, offset + kMips64WordSize);
-      } else {
-        Sd(reg, base, offset);
-      }
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
+  StoreToOffset<>(type, reg, base, offset);
 }
 
-void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
+void Mips64Assembler::StoreFpuToOffset(StoreOperandType type,
+                                       FpuRegister reg,
+                                       GpuRegister base,
                                        int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
-
-  switch (type) {
-    case kStoreWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Swc1(reg, base, offset);
-      break;
-    case kStoreDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Mfhc1(TMP2, reg);
-        Swc1(reg, base, offset);
-        Sw(TMP2, base, offset + kMips64WordSize);
-      } else {
-        Sdc1(reg, base, offset);
-      }
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
+  StoreFpuToOffset<>(type, reg, base, offset);
 }
 
 static dwarf::Reg DWARFReg(GpuRegister reg) {
@@ -2367,12 +2256,8 @@
   CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
   LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && unpoison_reference) {
-    // TODO: review
-    // Negate the 32-bit ref
-    Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
-    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsGpuRegister());
   }
 }
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a0a1db6..b98db65 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -512,6 +512,7 @@
   void Ldpc(GpuRegister rs, uint32_t imm18);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
   void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
@@ -654,6 +655,44 @@
   void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `src` and store it in `dst`.
+  void PoisonHeapReference(GpuRegister dst, GpuRegister src) {
+    // dst = -src.
+    // Negate the 32-bit ref.
+    Dsubu(dst, ZERO, src);
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+    Dext(dst, dst, 0, 32);
+  }
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(GpuRegister reg) {
+    // reg = -reg.
+    PoisonHeapReference(reg, reg);
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(GpuRegister reg) {
+    // reg = -reg.
+    // Negate the 32-bit ref.
+    Dsubu(reg, ZERO, reg);
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+    Dext(reg, reg, 0, 32);
+  }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(GpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(GpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
   void Bind(Label* label) OVERRIDE {
     Bind(down_cast<Mips64Label*>(label));
   }
@@ -733,6 +772,271 @@
   void Bc1nez(FpuRegister ft, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
+
+ private:
+  // This will be used as an argument for loads/stores
+  // when there is no need for implicit null checks.
+  struct NoImplicitNullChecker {
+    void operator()() const {}
+  };
+
+ public:
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void StoreConstToOffset(StoreOperandType type,
+                          int64_t value,
+                          GpuRegister base,
+                          int32_t offset,
+                          GpuRegister temp,
+                          ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    // We permit `base` and `temp` to coincide (however, we check that neither is AT),
+    // in which case the `base` register may be overwritten in the process.
+    CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
+    if (!IsInt<16>(offset) ||
+        (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+         !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+      LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+      Daddu(AT, AT, base);
+      base = AT;
+      offset &= (kMips64DoublewordSize - 1);
+    }
+    GpuRegister reg;
+    // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp`
+    // to load and hold the value but we can use AT instead as AT hasn't been used yet.
+    // Otherwise, `temp` can be used for the value. And if `temp` is the same as the
+    // original `base` (that is, `base` prior to the adjustment), the original `base`
+    // register will be overwritten.
+    if (base == temp) {
+      temp = AT;
+    }
+
+    if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) {
+      if (value == 0) {
+        reg = ZERO;
+      } else {
+        reg = temp;
+        LoadConst64(reg, value);
+      }
+      Sd(reg, base, offset);
+      null_checker();
+    } else {
+      uint32_t low = Low32Bits(value);
+      uint32_t high = High32Bits(value);
+      if (low == 0) {
+        reg = ZERO;
+      } else {
+        reg = temp;
+        LoadConst32(reg, low);
+      }
+      switch (type) {
+        case kStoreByte:
+          Sb(reg, base, offset);
+          break;
+        case kStoreHalfword:
+          Sh(reg, base, offset);
+          break;
+        case kStoreWord:
+          Sw(reg, base, offset);
+          break;
+        case kStoreDoubleword:
+          // not aligned to kMips64DoublewordSize
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Sw(reg, base, offset);
+          null_checker();
+          if (high == 0) {
+            reg = ZERO;
+          } else {
+            reg = temp;
+            if (high != low) {
+              LoadConst32(reg, high);
+            }
+          }
+          Sw(reg, base, offset + kMips64WordSize);
+          break;
+        default:
+          LOG(FATAL) << "UNREACHABLE";
+      }
+      if (type != kStoreDoubleword) {
+        null_checker();
+      }
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void LoadFromOffset(LoadOperandType type,
+                      GpuRegister reg,
+                      GpuRegister base,
+                      int32_t offset,
+                      ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    if (!IsInt<16>(offset) ||
+        (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+         !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+      LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+      Daddu(AT, AT, base);
+      base = AT;
+      offset &= (kMips64DoublewordSize - 1);
+    }
+
+    switch (type) {
+      case kLoadSignedByte:
+        Lb(reg, base, offset);
+        break;
+      case kLoadUnsignedByte:
+        Lbu(reg, base, offset);
+        break;
+      case kLoadSignedHalfword:
+        Lh(reg, base, offset);
+        break;
+      case kLoadUnsignedHalfword:
+        Lhu(reg, base, offset);
+        break;
+      case kLoadWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lw(reg, base, offset);
+        break;
+      case kLoadUnsignedWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwu(reg, base, offset);
+        break;
+      case kLoadDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Lwu(reg, base, offset);
+          null_checker();
+          Lwu(TMP2, base, offset + kMips64WordSize);
+          Dinsu(reg, TMP2, 32, 32);
+        } else {
+          Ld(reg, base, offset);
+          null_checker();
+        }
+        break;
+    }
+    if (type != kLoadDoubleword) {
+      null_checker();
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void LoadFpuFromOffset(LoadOperandType type,
+                         FpuRegister reg,
+                         GpuRegister base,
+                         int32_t offset,
+                         ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    if (!IsInt<16>(offset) ||
+        (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+         !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+      LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+      Daddu(AT, AT, base);
+      base = AT;
+      offset &= (kMips64DoublewordSize - 1);
+    }
+
+    switch (type) {
+      case kLoadWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwc1(reg, base, offset);
+        null_checker();
+        break;
+      case kLoadDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Lwc1(reg, base, offset);
+          null_checker();
+          Lw(TMP2, base, offset + kMips64WordSize);
+          Mthc1(TMP2, reg);
+        } else {
+          Ldc1(reg, base, offset);
+          null_checker();
+        }
+        break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void StoreToOffset(StoreOperandType type,
+                     GpuRegister reg,
+                     GpuRegister base,
+                     int32_t offset,
+                     ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    if (!IsInt<16>(offset) ||
+        (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+         !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+      LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+      Daddu(AT, AT, base);
+      base = AT;
+      offset &= (kMips64DoublewordSize - 1);
+    }
+
+    switch (type) {
+      case kStoreByte:
+        Sb(reg, base, offset);
+        break;
+      case kStoreHalfword:
+        Sh(reg, base, offset);
+        break;
+      case kStoreWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Sw(reg, base, offset);
+        break;
+      case kStoreDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Sw(reg, base, offset);
+          null_checker();
+          Dsrl32(TMP2, reg, 0);
+          Sw(TMP2, base, offset + kMips64WordSize);
+        } else {
+          Sd(reg, base, offset);
+          null_checker();
+        }
+        break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
+    }
+    if (type != kStoreDoubleword) {
+      null_checker();
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void StoreFpuToOffset(StoreOperandType type,
+                        FpuRegister reg,
+                        GpuRegister base,
+                        int32_t offset,
+                        ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    if (!IsInt<16>(offset) ||
+        (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+         !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+      LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+      Daddu(AT, AT, base);
+      base = AT;
+      offset &= (kMips64DoublewordSize - 1);
+    }
+
+    switch (type) {
+      case kStoreWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Swc1(reg, base, offset);
+        null_checker();
+        break;
+      case kStoreDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Mfhc1(TMP2, reg);
+          Swc1(reg, base, offset);
+          null_checker();
+          Sw(TMP2, base, offset + kMips64WordSize);
+        } else {
+          Sdc1(reg, base, offset);
+          null_checker();
+        }
+        break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
+    }
+  }
+
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
   void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
   void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 74b8f06..879807a 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1269,6 +1269,24 @@
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui");
 }
 
+TEST_F(AssemblerMIPS64Test, Daui) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  reg2_registers.erase(reg2_registers.begin());  // reg2 can't be ZERO, remove it.
+  std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true);
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int64_t imm : imms) {
+        __ Daui(*reg1, *reg2, imm);
+        expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n";
+      }
+    }
+  }
+  DriverStr(expected.str(), "daui");
+}
+
 TEST_F(AssemblerMIPS64Test, Dahi) {
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi");
 }
@@ -2160,6 +2178,82 @@
   DriverStr(expected, "StoreFpuToOffset");
 }
 
+TEST_F(AssemblerMIPS64Test, StoreConstToOffset) {
+  __ StoreConstToOffset(mips64::kStoreByte, 0xFF, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreHalfword, 0xFFFF, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x123456789ABCDEF0, mips64::A1, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreByte, 0, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreHalfword, 0, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0, mips64::A1, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567812345678, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567800000000, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x0000000012345678, mips64::A1, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, -0xFFF0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0xFFF0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, -0xFFF0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0xFFF0, mips64::T8);
+
+  const char* expected =
+      "ori $t8, $zero, 0xFF\n"
+      "sb $t8, 0($a1)\n"
+      "ori $t8, $zero, 0xFFFF\n"
+      "sh $t8, 0($a1)\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8,0x5678\n"
+      "sw $t8, 0($a1)\n"
+      "lui $t8, 0x9abc\n"
+      "ori $t8, $t8,0xdef0\n"
+      "dahi $t8, $t8, 0x5679\n"
+      "dati $t8, $t8, 0x1234\n"
+      "sd $t8, 0($a1)\n"
+      "sb $zero, 0($a1)\n"
+      "sh $zero, 0($a1)\n"
+      "sw $zero, 0($a1)\n"
+      "sd $zero, 0($a1)\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8,0x5678\n"
+      "dins $t8, $t8, 0x20, 0x20\n"
+      "sd $t8, 0($a1)\n"
+      "lui $t8, 0x246\n"
+      "ori $t8, $t8, 0x8acf\n"
+      "dsll32 $t8, $t8, 0x3\n"
+      "sd $t8, 0($a1)\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8, 0x5678\n"
+      "sd $t8, 0($a1)\n"
+      "sw $zero, 0($t8)\n"
+      "lui $at,0x1234\n"
+      "ori $at, $at, 0x5678\n"
+      "sw  $at, 0($t8)\n"
+      "lui $at, 0xffff\n"
+      "ori $at, $at, 0x10\n"
+      "daddu $at, $at, $a1\n"
+      "sw $zero, 0($at)\n"
+      "li $at, 0xfff0\n"
+      "daddu $at, $at, $a1\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8, 0x5678\n"
+      "sw  $t8, 0($at)\n"
+      "lui $at, 0xffff\n"
+      "ori $at, $at, 0x10\n"
+      "daddu $at, $at, $t8\n"
+      "sw $zero, 0($at)\n"
+      "li $at, 0xfff0\n"
+      "daddu $at, $at, $t8\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8, 0x5678\n"
+      "sw  $t8, 0($at)\n";
+  DriverStr(expected, "StoreConstToOffset");
+}
 //////////////////////////////
 // Loading/adding Constants //
 //////////////////////////////
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 1a8f567..a1eb08e 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -36,17 +36,17 @@
 static void DumpFreeMap(const FreeBySizeSet& free_by_size) {
   size_t last_size = static_cast<size_t>(-1);
   for (const auto& entry : free_by_size) {
-    if (last_size != entry.first) {
-      last_size = entry.first;
+    if (last_size != entry.size) {
+      last_size = entry.size;
       LOG(INFO) << "Size " << last_size;
     }
-    LOG(INFO) << "  0x" << std::hex << entry.second->Start()
-        << " size=" << std::dec << entry.second->size;
+    LOG(INFO) << "  0x" << std::hex << entry.free_by_start_entry->Start()
+        << " size=" << std::dec << entry.free_by_start_entry->size;
   }
 }
 
 void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
-  auto free_by_start_pos = free_by_size_pos->second;
+  auto free_by_start_pos = free_by_size_pos->free_by_start_entry;
   free_by_size_.erase(free_by_size_pos);
   free_by_start_.erase(free_by_start_pos);
 }
@@ -89,7 +89,7 @@
   // Calculate over free_by_size.
   size_t sum1 = 0;
   for (const auto& entry : free_by_size) {
-    sum1 += entry.second->size;
+    sum1 += entry.free_by_start_entry->size;
   }
 
   // Calculate over free_by_start.
@@ -110,27 +110,52 @@
 
   // Check the free list for something that fits.
   // TODO: Smarter implementation. Global biggest chunk, ...
-  SpaceChunk old_chunk;
   auto it = free_by_start_.empty()
       ? free_by_size_.end()
       : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
   if (it != free_by_size_.end()) {
-    old_chunk = *it->second;
-    RemoveChunk(it);
+    auto entry = it->free_by_start_entry;
+    SpaceChunk old_chunk = *entry;
+    if (old_chunk.size == size) {
+      RemoveChunk(it);
+    } else {
+      // Try to avoid deallocating and allocating the std::set<> nodes.
+      // This would be much simpler if we could use replace() from Boost.Bimap.
+
+      // The free_by_start_ map contains disjoint intervals ordered by the `ptr`.
+      // Shrinking the interval does not affect the ordering.
+      it->free_by_start_entry->ptr += size;
+      it->free_by_start_entry->size -= size;
+
+      // The free_by_size_ map is ordered by the `size` and then `free_by_start_entry->ptr`.
+      // Adjusting the `ptr` above does not change that ordering but decreasing `size` can
+      // push the node before the previous node(s).
+      if (it == free_by_size_.begin()) {
+        it->size -= size;
+      } else {
+        auto prev = it;
+        --prev;
+        FreeBySizeEntry new_value(old_chunk.size - size, entry);
+        if (free_by_size_.key_comp()(*prev, new_value)) {
+          it->size -= size;
+        } else {
+          // Changing in place would break the std::set<> ordering, we need to remove and insert.
+          free_by_size_.erase(it);
+          free_by_size_.insert(new_value);
+        }
+      }
+    }
+    return old_chunk.ptr;
   } else {
     // Not a big enough free chunk, need to increase file size.
-    old_chunk = NewFileChunk(size);
+    SpaceChunk new_chunk = NewFileChunk(size);
+    if (new_chunk.size != size) {
+      // Insert the remainder.
+      SpaceChunk remainder = { new_chunk.ptr + size, new_chunk.size - size };
+      InsertChunk(remainder);
+    }
+    return new_chunk.ptr;
   }
-
-  void* ret = old_chunk.ptr;
-
-  if (old_chunk.size != size) {
-    // Insert the remainder.
-    SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
-    InsertChunk(new_chunk);
-  }
-
-  return ret;
 }
 
 SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9600907..c286b82 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -45,8 +45,10 @@
  private:
   // Chunk of space.
   struct SpaceChunk {
-    uint8_t* ptr;
-    size_t size;
+    // We need mutable members as we keep these objects in a std::set<> (providing only const
+    // access) but we modify these members while carefully preserving the std::set<> ordering.
+    mutable uint8_t* ptr;
+    mutable size_t size;
 
     uintptr_t Start() const {
       return reinterpret_cast<uintptr_t>(ptr);
@@ -66,13 +68,21 @@
   typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
 
   // Map size to an iterator to free_by_start_'s entry.
-  typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
+  struct FreeBySizeEntry {
+    FreeBySizeEntry(size_t sz, FreeByStartSet::const_iterator entry)
+        : size(sz), free_by_start_entry(entry) { }
+
+    // We need mutable members as we keep these objects in a std::set<> (providing only const
+    // access) but we modify these members while carefully preserving the std::set<> ordering.
+    mutable size_t size;
+    mutable FreeByStartSet::const_iterator free_by_start_entry;
+  };
   struct FreeBySizeComparator {
     bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) {
-      if (lhs.first != rhs.first) {
-        return lhs.first < rhs.first;
+      if (lhs.size != rhs.size) {
+        return lhs.size < rhs.size;
       } else {
-        return lhs.second->Start() < rhs.second->Start();
+        return lhs.free_by_start_entry->Start() < rhs.free_by_start_entry->Start();
       }
     }
   };
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6eab302..5307dc0 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -958,6 +958,14 @@
 }
 
 
+void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x5B);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -1161,6 +1169,32 @@
 }
 
 
+void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::andnps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pandn(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDF);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1187,6 +1221,43 @@
 }
 
 
+void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x74);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x75);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x76);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x29);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2999599..f52cf16 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -457,6 +457,7 @@
   void cvttss2si(Register dst, XmmRegister src);
   void cvttsd2si(Register dst, XmmRegister src);
 
+  void cvtdq2ps(XmmRegister dst, XmmRegister src);
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
@@ -486,10 +487,19 @@
   void andps(XmmRegister dst, const Address& src);
   void pand(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
 
+  void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void andnps(XmmRegister dst, XmmRegister src);
+  void pandn(XmmRegister dst, XmmRegister src);
+
   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void orps(XmmRegister dst, XmmRegister src);
   void por(XmmRegister dst, XmmRegister src);
 
+  void pcmpeqb(XmmRegister dst, XmmRegister src);
+  void pcmpeqw(XmmRegister dst, XmmRegister src);
+  void pcmpeqd(XmmRegister dst, XmmRegister src);
+  void pcmpeqq(XmmRegister dst, XmmRegister src);
+
   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a74bea2..2304907 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -322,6 +322,14 @@
   DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
 }
 
+TEST_F(AssemblerX86Test, Cvtdq2ps) {
+  DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
+TEST_F(AssemblerX86Test, Cvtdq2pd) {
+  DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
+}
+
 TEST_F(AssemblerX86Test, ComissAddr) {
   GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
   const char* expected = "comiss 0(%EAX), %xmm0\n";
@@ -573,6 +581,18 @@
   DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
 }
 
+TEST_F(AssemblerX86Test, AndnPD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
+}
+
+TEST_F(AssemblerX86Test, AndnPS) {
+  DriverStr(RepeatFF(&x86::X86Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps");
+}
+
+TEST_F(AssemblerX86Test, PAndn) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
+}
+
 TEST_F(AssemblerX86Test, OrPD) {
   DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
 }
@@ -585,6 +605,22 @@
   DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
 }
 
+TEST_F(AssemblerX86Test, PCmpeqB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "cmpeqw");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "cmpeqd");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqQ) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq");
+}
+
 TEST_F(AssemblerX86Test, ShufPS) {
   DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 458204a..d20a696 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1153,6 +1153,15 @@
 }
 
 
+void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5B);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -1366,6 +1375,32 @@
   EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDF);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1392,6 +1427,43 @@
   EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x74);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x75);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x76);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x29);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0dc11d8..08e17e8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -486,6 +486,7 @@
   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
 
+  void cvtdq2ps(XmmRegister dst, XmmRegister src);
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
@@ -514,10 +515,19 @@
   void andps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void pand(XmmRegister dst, XmmRegister src);
 
+  void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void andnps(XmmRegister dst, XmmRegister src);
+  void pandn(XmmRegister dst, XmmRegister src);
+
   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void orps(XmmRegister dst, XmmRegister src);
   void por(XmmRegister dst, XmmRegister src);
 
+  void pcmpeqb(XmmRegister dst, XmmRegister src);
+  void pcmpeqw(XmmRegister dst, XmmRegister src);
+  void pcmpeqd(XmmRegister dst, XmmRegister src);
+  void pcmpeqq(XmmRegister dst, XmmRegister src);
+
   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index fe94497..20062fd 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1205,6 +1205,10 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss");
 }
 
+TEST_F(AssemblerX86_64Test, Cvtdq2ps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
 TEST_F(AssemblerX86_64Test, Cvtdq2pd) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
 }
@@ -1265,6 +1269,18 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
 }
 
+TEST_F(AssemblerX86_64Test, andnpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
+}
+
+TEST_F(AssemblerX86_64Test, andnps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps");
+}
+
+TEST_F(AssemblerX86_64Test, Pandn) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
+}
+
 TEST_F(AssemblerX86_64Test, Orps) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps");
 }
@@ -1277,6 +1293,22 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por");
 }
 
+TEST_F(AssemblerX86_64Test, PCmpeqb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "pcmpeqw");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "pcmpeqd");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq");
+}
+
 TEST_F(AssemblerX86_64Test, Shufps) {
   DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
 }
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index c892b25..1a1d163 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -246,9 +246,13 @@
   }
 
   bool HasUnverifiedClass(const std::string& cls) {
-    const DexFile::TypeId* type_id = primary_dex_file_->FindTypeId(cls.c_str());
+    return HasUnverifiedClass(cls, *primary_dex_file_);
+  }
+
+  bool HasUnverifiedClass(const std::string& cls, const DexFile& dex_file) {
+    const DexFile::TypeId* type_id = dex_file.FindTypeId(cls.c_str());
     DCHECK(type_id != nullptr);
-    dex::TypeIndex index = primary_dex_file_->GetIndexForTypeId(*type_id);
+    dex::TypeIndex index = dex_file.GetIndexForTypeId(*type_id);
     for (const auto& dex_dep : verifier_deps_->dex_deps_) {
       for (dex::TypeIndex entry : dex_dep.second->unverified_classes_) {
         if (index == entry) {
@@ -710,12 +714,12 @@
 
 TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInReferenced) {
   ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInReferenced"));
-  ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public final"));
+  ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public"));
   ASSERT_TRUE(HasField("Ljava/lang/System;",
                        "out",
                        "Ljava/io/PrintStream;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljava/lang/System;"));
 }
 
@@ -723,13 +727,13 @@
   ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass1"));
   ASSERT_TRUE(HasClass("Ljava/util/SimpleTimeZone;", true, "public"));
   ASSERT_TRUE(HasField(
-      "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public final static", "Ljava/util/TimeZone;"));
+      "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public static", "Ljava/util/TimeZone;"));
 }
 
 TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInSuperclass2) {
   ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass2"));
   ASSERT_TRUE(HasField(
-      "LMySimpleTimeZone;", "SHORT", "I", true, "public final static", "Ljava/util/TimeZone;"));
+      "LMySimpleTimeZone;", "SHORT", "I", true, "public static", "Ljava/util/TimeZone;"));
 }
 
 TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface1) {
@@ -739,7 +743,7 @@
                        "PI_ENABLE_OUTPUT_ESCAPING",
                        "Ljava/lang/String;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljavax/xml/transform/Result;"));
 }
 
@@ -749,7 +753,7 @@
                        "PI_ENABLE_OUTPUT_ESCAPING",
                        "Ljava/lang/String;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljavax/xml/transform/Result;"));
 }
 
@@ -759,7 +763,7 @@
                        "PI_ENABLE_OUTPUT_ESCAPING",
                        "Ljava/lang/String;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljavax/xml/transform/Result;"));
 }
 
@@ -769,13 +773,13 @@
                        "ELEMENT_NODE",
                        "S",
                        true,
-                       "public final static",
+                       "public static",
                        "Lorg/w3c/dom/Node;"));
 }
 
 TEST_F(VerifierDepsTest, StaticField_Unresolved_ReferrerInBoot) {
   ASSERT_TRUE(VerifyMethod("StaticField_Unresolved_ReferrerInBoot"));
-  ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public"));
   ASSERT_TRUE(HasField("Ljava/util/TimeZone;", "x", "I", false));
 }
 
@@ -847,7 +851,7 @@
 
 TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInSuperclass1) {
   ASSERT_TRUE(VerifyMethod("InvokeStatic_Resolved_DeclaredInSuperclass1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct",
                         "Ljavax/net/ssl/SSLSocket;",
                         "setSocketImplFactory",
@@ -870,7 +874,7 @@
 
 TEST_F(VerifierDepsTest, InvokeStatic_DeclaredInInterface1) {
   ASSERT_TRUE(VerifyMethod("InvokeStatic_DeclaredInInterface1"));
-  ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public interface"));
   ASSERT_TRUE(HasMethod("direct",
                         "Ljava/util/Map$Entry;",
                         "comparingByKey",
@@ -892,7 +896,7 @@
 
 TEST_F(VerifierDepsTest, InvokeStatic_Unresolved1) {
   ASSERT_FALSE(VerifyMethod("InvokeStatic_Unresolved1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "x", "()V", false));
 }
 
@@ -910,7 +914,7 @@
 
 TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInSuperclass1) {
   ASSERT_FALSE(VerifyMethod("InvokeDirect_Resolved_DeclaredInSuperclass1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct",
                         "Ljavax/net/ssl/SSLSocket;",
                         "checkOldImpl",
@@ -928,7 +932,7 @@
 
 TEST_F(VerifierDepsTest, InvokeDirect_Unresolved1) {
   ASSERT_FALSE(VerifyMethod("InvokeDirect_Unresolved1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "x", "()V", false));
 }
 
@@ -983,7 +987,7 @@
                         "size",
                         "()I",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/util/Set;"));
 }
 
@@ -1012,13 +1016,13 @@
 
 TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInReferenced) {
   ASSERT_TRUE(VerifyMethod("InvokeInterface_Resolved_DeclaredInReferenced"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface"));
   ASSERT_TRUE(HasMethod("interface",
                         "Ljava/lang/Runnable;",
                         "run",
                         "()V",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/lang/Runnable;"));
 }
 
@@ -1034,7 +1038,7 @@
                         "run",
                         "()V",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/lang/Runnable;"));
 }
 
@@ -1045,13 +1049,13 @@
                         "isEmpty",
                         "()Z",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/util/Set;"));
 }
 
 TEST_F(VerifierDepsTest, InvokeInterface_Unresolved1) {
   ASSERT_FALSE(VerifyMethod("InvokeInterface_Unresolved1"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface"));
   ASSERT_TRUE(HasMethod("interface", "Ljava/lang/Runnable;", "x", "()V", false));
 }
 
@@ -1062,20 +1066,20 @@
 
 TEST_F(VerifierDepsTest, InvokeSuper_ThisAssignable) {
   ASSERT_TRUE(VerifyMethod("InvokeSuper_ThisAssignable"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface"));
   ASSERT_TRUE(HasAssignable("Ljava/lang/Runnable;", "Ljava/lang/Thread;", true));
   ASSERT_TRUE(HasMethod("interface",
                         "Ljava/lang/Runnable;",
                         "run",
                         "()V",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/lang/Runnable;"));
 }
 
 TEST_F(VerifierDepsTest, InvokeSuper_ThisNotAssignable) {
   ASSERT_FALSE(VerifyMethod("InvokeSuper_ThisNotAssignable"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public final"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public"));
   ASSERT_TRUE(HasAssignable("Ljava/lang/Integer;", "Ljava/lang/Thread;", false));
   ASSERT_TRUE(HasMethod(
       "virtual", "Ljava/lang/Integer;", "intValue", "()I", true, "public", "Ljava/lang/Integer;"));
@@ -1083,12 +1087,12 @@
 
 TEST_F(VerifierDepsTest, ArgumentType_ResolvedReferenceArray) {
   ASSERT_TRUE(VerifyMethod("ArgumentType_ResolvedReferenceArray"));
-  ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public final abstract"));
+  ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public"));
 }
 
 TEST_F(VerifierDepsTest, NewArray_Resolved) {
   ASSERT_TRUE(VerifyMethod("NewArray_Resolved"));
-  ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public final abstract"));
+  ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public"));
 }
 
 TEST_F(VerifierDepsTest, EncodeDecode) {
@@ -1141,7 +1145,7 @@
   // Test that a class with hard failure is recorded.
   ASSERT_TRUE(HasUnverifiedClass("LMyVerificationFailure;"));
   // Test that a class with unresolved super is recorded.
-  ASSERT_FALSE(HasUnverifiedClass("LMyClassWithNoSuper;"));
+  ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuper;"));
   // Test that a class with unresolved super and hard failure is recorded.
   ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;"));
 }
@@ -1511,5 +1515,18 @@
   }
 }
 
+TEST_F(VerifierDepsTest, MultiDexVerification) {
+  VerifyDexFile("VerifierDepsMulti");
+  ASSERT_EQ(NumberOfCompiledDexFiles(), 2u);
+
+  ASSERT_TRUE(HasUnverifiedClass("LMySoftVerificationFailure;", *dex_files_[1]));
+  ASSERT_TRUE(HasUnverifiedClass("LMySub1SoftVerificationFailure;", *dex_files_[0]));
+  ASSERT_TRUE(HasUnverifiedClass("LMySub2SoftVerificationFailure;", *dex_files_[0]));
+
+  std::vector<uint8_t> buffer;
+  verifier_deps_->Encode(dex_files_, &buffer);
+  ASSERT_FALSE(buffer.empty());
+}
+
 }  // namespace verifier
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index e6b7930..92a12c8 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -328,11 +328,6 @@
   UsageError("");
   UsageError("  --dump-timing: display a breakdown of where time was spent");
   UsageError("");
-  UsageError("  --include-patch-information: Include patching information so the generated code");
-  UsageError("      can have its base address moved without full recompilation.");
-  UsageError("");
-  UsageError("  --no-include-patch-information: Do not include patching information.");
-  UsageError("");
   UsageError("  -g");
   UsageError("  --generate-debug-info: Generate debug information for native debugging,");
   UsageError("      such as stack unwinding information, ELF symbols and DWARF sections.");
@@ -424,7 +419,13 @@
         shutting_down_(false) {
     const char* reason = "dex2oat watch dog thread startup";
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_init, (&mutex_, nullptr), reason);
-    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_cond_init, (&cond_, nullptr), reason);
+#ifndef __APPLE__
+    pthread_condattr_t condattr;
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_condattr_init, (&condattr), reason);
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_condattr_setclock, (&condattr, CLOCK_MONOTONIC), reason);
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_cond_init, (&cond_, &condattr), reason);
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_condattr_destroy, (&condattr), reason);
+#endif
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_attr_init, (&attr_), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_create, (&pthread_, &attr_, &CallBack, this), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_attr_destroy, (&attr_), reason);
@@ -482,7 +483,11 @@
 
   void Wait() {
     timespec timeout_ts;
+#if defined(__APPLE__)
     InitTimeSpec(true, CLOCK_REALTIME, timeout_in_milliseconds_, 0, &timeout_ts);
+#else
+    InitTimeSpec(true, CLOCK_MONOTONIC, timeout_in_milliseconds_, 0, &timeout_ts);
+#endif
     const char* reason = "dex2oat watch dog thread waiting";
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_lock, (&mutex_), reason);
     while (!shutting_down_) {
@@ -1416,25 +1421,15 @@
     if (profile_compilation_info_ != nullptr && IsAppImage()) {
       Runtime* runtime = Runtime::Current();
       CHECK(runtime != nullptr);
-      std::set<DexCacheResolvedClasses> resolved_classes(
-          profile_compilation_info_->GetResolvedClasses());
-
       // Filter out class path classes since we don't want to include these in the image.
       std::unordered_set<std::string> dex_files_locations;
       for (const DexFile* dex_file : dex_files_) {
         dex_files_locations.insert(dex_file->GetLocation());
       }
-      for (auto it = resolved_classes.begin(); it != resolved_classes.end(); ) {
-        if (dex_files_locations.find(it->GetDexLocation()) == dex_files_locations.end()) {
-          VLOG(compiler) << "Removed profile samples for non-app dex file " << it->GetDexLocation();
-          it = resolved_classes.erase(it);
-        } else {
-          ++it;
-        }
-      }
-
+      std::set<DexCacheResolvedClasses> resolved_classes(
+          profile_compilation_info_->GetResolvedClasses(dex_files_locations));
       image_classes_.reset(new std::unordered_set<std::string>(
-          runtime->GetClassLinker()->GetClassDescriptorsForProfileKeys(resolved_classes)));
+          runtime->GetClassLinker()->GetClassDescriptorsForResolvedClasses(resolved_classes)));
       VLOG(compiler) << "Loaded " << image_classes_->size()
                      << " image class descriptors from profile";
       if (VLOG_IS_ON(compiler)) {
@@ -1491,7 +1486,7 @@
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum_ = OatFileAssistant::CalculateCombinedImageChecksum();
+        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
         image_file_location_oat_data_begin_ =
             reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
         image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
@@ -1541,10 +1536,10 @@
         std::unique_ptr<MemMap> opened_dex_files_map;
         std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
         // No need to verify the dex file for:
-        // 1) kSpeedProfile, since it includes dexlayout, which does the verification.
+        // 1) Dexlayout since it does the verification. It also may not pass the verification since
+        // we don't update the dex checksum.
         // 2) when we have a vdex file, which means it was already verified.
-        bool verify = compiler_options_->GetCompilerFilter() != CompilerFilter::kSpeedProfile &&
-            (input_vdex_file_ == nullptr);
+        const bool verify = !DoDexLayoutOptimizations() && (input_vdex_file_ == nullptr);
         if (!oat_writers_[i]->WriteAndOpenDexFiles(
             kIsVdexEnabled ? vdex_files_[i].get() : oat_files_[i].get(),
             rodata_.back(),
@@ -1912,6 +1907,14 @@
                                              oat_writer->GetOatDataOffset(),
                                              oat_writer->GetOatSize());
         }
+
+        if (IsBootImage()) {
+          // Have the image_file_location_oat_checksum_ for boot oat files
+          // depend on the contents of all the boot oat files. This way only
+          // the primary image checksum needs to be checked to determine
+          // whether any of the images are out of date.
+          image_file_location_oat_checksum_ ^= oat_writer->GetOatHeader().GetChecksum();
+        }
       }
 
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
@@ -1958,7 +1961,6 @@
 
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-        elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
 
         if (!elf_writer->End()) {
           LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
@@ -2094,12 +2096,24 @@
     return is_host_;
   }
 
-  bool UseProfileGuidedCompilation() const {
+  bool UseProfile() const {
     return profile_file_fd_ != -1 || !profile_file_.empty();
   }
 
+  bool DoProfileGuidedOptimizations() const {
+    return UseProfile() && compiler_options_->GetCompilerFilter() != CompilerFilter::kVerifyProfile;
+  }
+
+  bool DoDexLayoutOptimizations() const {
+    return DoProfileGuidedOptimizations();
+  }
+
+  bool HasInputVdexFile() const {
+    return input_vdex_file_ != nullptr || input_vdex_fd_ != -1 || !input_vdex_.empty();
+  }
+
   bool LoadProfile() {
-    DCHECK(UseProfileGuidedCompilation());
+    DCHECK(UseProfile());
 
     profile_compilation_info_.reset(new ProfileCompilationInfo());
     ScopedFlock flock;
@@ -2356,7 +2370,7 @@
                                                      compiler_options_.get(),
                                                      oat_file.get()));
       elf_writers_.back()->Start();
-      bool do_dexlayout = compiler_options_->GetCompilerFilter() == CompilerFilter::kSpeedProfile;
+      const bool do_dexlayout = DoDexLayoutOptimizations();
       oat_writers_.emplace_back(new OatWriter(
           IsBootImage(), timings_, do_dexlayout ? profile_compilation_info_.get() : nullptr));
     }
@@ -2803,6 +2817,9 @@
 
   // When given --host, finish early without stripping.
   if (dex2oat.IsHost()) {
+    if (!dex2oat.FlushCloseOutputFiles()) {
+      return EXIT_FAILURE;
+    }
     dex2oat.DumpTiming();
     return EXIT_SUCCESS;
   }
@@ -2873,13 +2890,20 @@
 
   // If needed, process profile information for profile guided compilation.
   // This operation involves I/O.
-  if (dex2oat->UseProfileGuidedCompilation()) {
+  if (dex2oat->UseProfile()) {
     if (!dex2oat->LoadProfile()) {
       LOG(ERROR) << "Failed to process profile file";
       return EXIT_FAILURE;
     }
   }
 
+  if (dex2oat->DoDexLayoutOptimizations()) {
+    if (dex2oat->HasInputVdexFile()) {
+      LOG(ERROR) << "Dexlayout is incompatible with an input VDEX";
+      return EXIT_FAILURE;
+    }
+  }
+
   art::MemMap::Init();  // For ZipEntry::ExtractToMemMap, and vdex.
 
   // Check early that the result of compilation can be written
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 6881f75..289b8ab 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -37,6 +37,8 @@
 
 namespace art {
 
+using android::base::StringPrintf;
+
 class Dex2oatTest : public Dex2oatEnvironmentTest {
  public:
   virtual void TearDown() OVERRIDE {
@@ -52,10 +54,19 @@
                            const std::string& odex_location,
                            CompilerFilter::Filter filter,
                            const std::vector<std::string>& extra_args = {},
-                           bool expect_success = true) {
+                           bool expect_success = true,
+                           bool use_fd = false) {
+    std::unique_ptr<File> oat_file;
     std::vector<std::string> args;
     args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + odex_location);
+    if (use_fd) {
+      oat_file.reset(OS::CreateEmptyFile(odex_location.c_str()));
+      CHECK(oat_file != nullptr) << odex_location;
+      args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
+      args.push_back("--oat-location=" + odex_location);
+    } else {
+      args.push_back("--oat-file=" + odex_location);
+    }
     args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
     args.push_back("--runtime-arg");
     args.push_back("-Xnorelocate");
@@ -64,6 +75,9 @@
 
     std::string error_msg;
     bool success = Dex2Oat(args, &error_msg);
+    if (oat_file != nullptr) {
+      ASSERT_EQ(oat_file->FlushClose(), 0) << "Could not flush and close oat file";
+    }
 
     if (expect_success) {
       ASSERT_TRUE(success) << error_msg << std::endl << output_;
@@ -554,6 +568,12 @@
   RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
 }
 
+// Regressin test for b/35665292.
+TEST_F(Dex2oatVeryLargeTest, SpeedProfileNoProfile) {
+  // Test that dex2oat doesn't crash with speed-profile but no input profile.
+  RunTest(CompilerFilter::kSpeedProfile, false);
+}
+
 class Dex2oatLayoutTest : public Dex2oatTest {
  protected:
   void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
@@ -564,40 +584,152 @@
   // Emits a profile with a single dex file with the given location and a single class index of 1.
   void GenerateProfile(const std::string& test_profile,
                        const std::string& dex_location,
+                       size_t num_classes,
                        uint32_t checksum) {
     int profile_test_fd = open(test_profile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
     CHECK_GE(profile_test_fd, 0);
 
     ProfileCompilationInfo info;
     std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
-    info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1));
+    for (size_t i = 0; i < num_classes; ++i) {
+      info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1 + i));
+    }
     bool result = info.Save(profile_test_fd);
     close(profile_test_fd);
     ASSERT_TRUE(result);
   }
 
-  void RunTest() {
-    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
-    std::string profile_location = GetScratchDir() + "/primary.prof";
-    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
-
-    Copy(GetDexSrc2(), dex_location);
+  void CompileProfileOdex(const std::string& dex_location,
+                          const std::string& odex_location,
+                          const std::string& app_image_file_name,
+                          bool use_fd,
+                          size_t num_profile_classes,
+                          const std::vector<std::string>& extra_args = {},
+                          bool expect_success = true) {
+    const std::string profile_location = GetScratchDir() + "/primary.prof";
     const char* location = dex_location.c_str();
     std::string error_msg;
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     ASSERT_TRUE(DexFile::Open(location, location, true, &error_msg, &dex_files));
     EXPECT_EQ(dex_files.size(), 1U);
     std::unique_ptr<const DexFile>& dex_file = dex_files[0];
-    GenerateProfile(profile_location, dex_location, dex_file->GetLocationChecksum());
+    GenerateProfile(profile_location,
+                    dex_location,
+                    num_profile_classes,
+                    dex_file->GetLocationChecksum());
+    std::vector<std::string> copy(extra_args);
+    copy.push_back("--profile-file=" + profile_location);
+    std::unique_ptr<File> app_image_file;
+    if (!app_image_file_name.empty()) {
+      if (use_fd) {
+        app_image_file.reset(OS::CreateEmptyFile(app_image_file_name.c_str()));
+        copy.push_back("--app-image-fd=" + std::to_string(app_image_file->Fd()));
+      } else {
+        copy.push_back("--app-image-file=" + app_image_file_name);
+      }
+    }
+    GenerateOdexForTest(dex_location,
+                        odex_location,
+                        CompilerFilter::kSpeedProfile,
+                        copy,
+                        expect_success,
+                        use_fd);
+    if (app_image_file != nullptr) {
+      ASSERT_EQ(app_image_file->FlushCloseOrErase(), 0) << "Could not flush and close art file";
+    }
+  }
 
-    const std::vector<std::string>& extra_args = { "--profile-file=" + profile_location };
-    GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeedProfile, extra_args);
+  uint64_t GetImageSize(const std::string& image_file_name) {
+    EXPECT_FALSE(image_file_name.empty());
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file_name.c_str()));
+    CHECK(file != nullptr);
+    ImageHeader image_header;
+    const bool success = file->ReadFully(&image_header, sizeof(image_header));
+    CHECK(success);
+    CHECK(image_header.IsValid());
+    ReaderMutexLock mu(Thread::Current(), *Locks::mutator_lock_);
+    return image_header.GetImageSize();
+  }
 
+  void RunTest(bool app_image) {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+    std::string app_image_file = app_image ? (GetOdexDir() + "/DexOdexNoOat.art"): "";
+    Copy(GetDexSrc2(), dex_location);
+
+    uint64_t image_file_empty_profile = 0;
+    if (app_image) {
+      CompileProfileOdex(dex_location,
+                         odex_location,
+                         app_image_file,
+                         /* use_fd */ false,
+                         /* num_profile_classes */ 0);
+      CheckValidity();
+      ASSERT_TRUE(success_);
+      // Don't check the result since CheckResult relies on the class being in the profile.
+      image_file_empty_profile = GetImageSize(app_image_file);
+      EXPECT_GT(image_file_empty_profile, 0u);
+    }
+
+    // Small profile.
+    CompileProfileOdex(dex_location,
+                       odex_location,
+                       app_image_file,
+                       /* use_fd */ false,
+                       /* num_profile_classes */ 1);
     CheckValidity();
     ASSERT_TRUE(success_);
-    CheckResult(dex_location, odex_location);
+    CheckResult(dex_location, odex_location, app_image_file);
+
+    if (app_image) {
+      // Test that the profile made a difference by adding more classes.
+      const uint64_t image_file_small_profile = GetImageSize(app_image_file);
+      CHECK_LT(image_file_empty_profile, image_file_small_profile);
+    }
   }
-  void CheckResult(const std::string& dex_location, const std::string& odex_location) {
+
+  void RunTestVDex() {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+    std::string vdex_location = GetOdexDir() + "/DexOdexNoOat.vdex";
+    std::string app_image_file_name = GetOdexDir() + "/DexOdexNoOat.art";
+    Copy(GetDexSrc2(), dex_location);
+
+    std::unique_ptr<File> vdex_file1(OS::CreateEmptyFile(vdex_location.c_str()));
+    CHECK(vdex_file1 != nullptr) << vdex_location;
+    ScratchFile vdex_file2;
+    {
+      std::string input_vdex = "--input-vdex-fd=-1";
+      std::string output_vdex = StringPrintf("--output-vdex-fd=%d", vdex_file1->Fd());
+      CompileProfileOdex(dex_location,
+                         odex_location,
+                         app_image_file_name,
+                         /* use_fd */ true,
+                         /* num_profile_classes */ 1,
+                         { input_vdex, output_vdex });
+      EXPECT_GT(vdex_file1->GetLength(), 0u);
+    }
+    {
+      // Test that vdex and dexlayout fail gracefully.
+      std::string input_vdex = StringPrintf("--input-vdex-fd=%d", vdex_file1->Fd());
+      std::string output_vdex = StringPrintf("--output-vdex-fd=%d", vdex_file2.GetFd());
+      CompileProfileOdex(dex_location,
+                         odex_location,
+                         app_image_file_name,
+                         /* use_fd */ true,
+                         /* num_profile_classes */ 1,
+                         { input_vdex, output_vdex },
+                         /* expect_success */ false);
+      EXPECT_EQ(vdex_file2.GetFile()->GetLength(), 0u);
+    }
+    ASSERT_EQ(vdex_file1->FlushCloseOrErase(), 0) << "Could not flush and close vdex file";
+    CheckValidity();
+    ASSERT_FALSE(success_);
+  }
+
+  void CheckResult(const std::string& dex_location,
+                   const std::string& odex_location,
+                   const std::string& app_image_file_name) {
     // Host/target independent checks.
     std::string error_msg;
     std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
@@ -633,29 +765,47 @@
     }
 
     EXPECT_EQ(odex_file->GetCompilerFilter(), CompilerFilter::kSpeedProfile);
+
+    if (!app_image_file_name.empty()) {
+      // Go peek at the image header to make sure it was large enough to contain the class.
+      std::unique_ptr<File> file(OS::OpenFileForReading(app_image_file_name.c_str()));
+      ImageHeader image_header;
+      bool success = file->ReadFully(&image_header, sizeof(image_header));
+      ASSERT_TRUE(success);
+      ASSERT_TRUE(image_header.IsValid());
+      EXPECT_GT(image_header.GetImageSection(ImageHeader::kSectionObjects).Size(), 0u);
+    }
   }
 
-    // Check whether the dex2oat run was really successful.
-    void CheckValidity() {
-      if (kIsTargetBuild) {
-        CheckTargetValidity();
-      } else {
-        CheckHostValidity();
-      }
+  // Check whether the dex2oat run was really successful.
+  void CheckValidity() {
+    if (kIsTargetBuild) {
+      CheckTargetValidity();
+    } else {
+      CheckHostValidity();
     }
+  }
 
-    void CheckTargetValidity() {
-      // TODO: Ignore for now.
-    }
+  void CheckTargetValidity() {
+    // TODO: Ignore for now.
+  }
 
-    // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
-    void CheckHostValidity() {
-      EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
-    }
-  };
+  // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+  void CheckHostValidity() {
+    EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+  }
+};
 
 TEST_F(Dex2oatLayoutTest, TestLayout) {
-  RunTest();
+  RunTest(/* app-image */ false);
+}
+
+TEST_F(Dex2oatLayoutTest, TestLayoutAppImage) {
+  RunTest(/* app-image */ true);
+}
+
+TEST_F(Dex2oatLayoutTest, TestVdexLayout) {
+  RunTestVDex();
 }
 
 class Dex2oatWatchdogTest : public Dex2oatTest {
diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp
index 9ee9ebd..cf523ec 100644
--- a/dexlayout/Android.bp
+++ b/dexlayout/Android.bp
@@ -19,6 +19,7 @@
         "dexlayout.cc",
         "dex_ir.cc",
         "dex_ir_builder.cc",
+	"dex_verify.cc",
         "dex_visualize.cc",
         "dex_writer.cc",
     ],
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 2d9bbfd..4228503 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -56,7 +56,7 @@
                     entry.end_address_, entry.reg_)));
 }
 
-static uint32_t GetCodeItemSize(const DexFile& dex_file, const DexFile::CodeItem& disk_code_item) {
+static uint32_t GetCodeItemSize(const DexFile::CodeItem& disk_code_item) {
   uintptr_t code_item_start = reinterpret_cast<uintptr_t>(&disk_code_item);
   uint32_t insns_size = disk_code_item.insns_size_in_code_units_;
   uint32_t tries_size = disk_code_item.tries_size_;
@@ -64,23 +64,18 @@
     uintptr_t insns_end = reinterpret_cast<uintptr_t>(&disk_code_item.insns_[insns_size]);
     return insns_end - code_item_start;
   } else {
-    uint32_t last_handler_off = 0;
-    for (uint32_t i = 0; i < tries_size; ++i) {
-      // Iterate over the try items to find the last catch handler.
-      const DexFile::TryItem* disk_try_item = dex_file.GetTryItems(disk_code_item, i);
-      uint16_t handler_off = disk_try_item->handler_off_;
-      if (handler_off > last_handler_off) {
-        last_handler_off = handler_off;
+    // Get the start of the handler data.
+    const uint8_t* handler_data = DexFile::GetCatchHandlerData(disk_code_item, 0);
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handler_data);
+    // Manually read each handler.
+    for (uint32_t i = 0; i < handlers_size; ++i) {
+      int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
+      if (uleb128_count <= 0) {
+        uleb128_count = -uleb128_count + 1;
       }
-    }
-    // Decode the final handler to see where it ends.
-    const uint8_t* handler_data = DexFile::GetCatchHandlerData(disk_code_item, last_handler_off);
-    int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
-    if (uleb128_count <= 0) {
-      uleb128_count = -uleb128_count + 1;
-    }
-    for (int32_t i = 0; i < uleb128_count; ++i) {
-      DecodeUnsignedLeb128(&handler_data);
+      for (int32_t j = 0; j < uleb128_count; ++j) {
+        DecodeUnsignedLeb128(&handler_data);
+      }
     }
     return reinterpret_cast<uintptr_t>(handler_data) - code_item_start;
   }
@@ -616,6 +611,7 @@
       for (std::unique_ptr<const CatchHandler>& existing_handlers : *handler_list) {
         if (handler_off == existing_handlers->GetListOffset()) {
           handlers = existing_handlers.get();
+          break;
         }
       }
       if (handlers == nullptr) {
@@ -634,8 +630,52 @@
       TryItem* try_item = new TryItem(start_addr, insn_count, handlers);
       tries->push_back(std::unique_ptr<const TryItem>(try_item));
     }
+    // Manually walk catch handlers list and add any missing handlers unreferenced by try items.
+    const uint8_t* handlers_base = DexFile::GetCatchHandlerData(disk_code_item, 0);
+    const uint8_t* handlers_data = handlers_base;
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_data);
+    while (handlers_size > handler_list->size()) {
+      bool already_added = false;
+      uint16_t handler_off = handlers_data - handlers_base;
+      for (std::unique_ptr<const CatchHandler>& existing_handlers : *handler_list) {
+        if (handler_off == existing_handlers->GetListOffset()) {
+          already_added = true;
+          break;
+        }
+      }
+      int32_t size = DecodeSignedLeb128(&handlers_data);
+      bool has_catch_all = size <= 0;
+      if (has_catch_all) {
+        size = -size;
+      }
+      if (already_added) {
+        for (int32_t i = 0; i < size; i++) {
+          DecodeUnsignedLeb128(&handlers_data);
+          DecodeUnsignedLeb128(&handlers_data);
+        }
+        if (has_catch_all) {
+          DecodeUnsignedLeb128(&handlers_data);
+        }
+        continue;
+      }
+      TypeAddrPairVector* addr_pairs = new TypeAddrPairVector();
+      for (int32_t i = 0; i < size; i++) {
+        const TypeId* type_id = GetTypeIdOrNullPtr(DecodeUnsignedLeb128(&handlers_data));
+        uint32_t addr = DecodeUnsignedLeb128(&handlers_data);
+        addr_pairs->push_back(
+            std::unique_ptr<const TypeAddrPair>(new TypeAddrPair(type_id, addr)));
+      }
+      if (has_catch_all) {
+        uint32_t addr = DecodeUnsignedLeb128(&handlers_data);
+        addr_pairs->push_back(
+            std::unique_ptr<const TypeAddrPair>(new TypeAddrPair(nullptr, addr)));
+      }
+      const CatchHandler* handler = new CatchHandler(has_catch_all, handler_off, addr_pairs);
+      handler_list->push_back(std::unique_ptr<const CatchHandler>(handler));
+    }
   }
-  uint32_t size = GetCodeItemSize(dex_file, disk_code_item);
+
+  uint32_t size = GetCodeItemSize(disk_code_item);
   CodeItem* code_item = new CodeItem(
       registers_size, ins_size, outs_size, debug_info, insns_size, insns, tries, handler_list);
   code_item->SetSize(size);
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 96afb90..78ddde8 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -748,8 +748,7 @@
   const TypeId* ClassType() const { return class_type_; }
   uint32_t GetAccessFlags() const { return access_flags_; }
   const TypeId* Superclass() const { return superclass_; }
-  const TypeIdVector* Interfaces()
-      { return interfaces_ == nullptr ? nullptr : interfaces_->GetTypeList(); }
+  const TypeList* Interfaces() { return interfaces_; }
   uint32_t InterfacesOffset() { return interfaces_ == nullptr ? 0 : interfaces_->GetOffset(); }
   const StringId* SourceFile() const { return source_file_; }
   AnnotationsDirectoryItem* Annotations() const { return annotations_; }
@@ -781,7 +780,7 @@
   uint32_t GetAddress() const { return address_; }
 
  private:
-  const TypeId* type_id_;
+  const TypeId* type_id_;  // This can be nullptr.
   uint32_t address_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeAddrPair);
diff --git a/dexlayout/dex_verify.cc b/dexlayout/dex_verify.cc
new file mode 100644
index 0000000..5458129
--- /dev/null
+++ b/dexlayout/dex_verify.cc
@@ -0,0 +1,1120 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Implementation file of dex ir verifier.
+ *
+ * Compares two dex files at the IR level, allowing differences in layout, but not in data.
+ */
+
+#include "dex_verify.h"
+
+#include <inttypes.h>
+
+#include "android-base/stringprintf.h"
+
+namespace art {
+
+using android::base::StringPrintf;
+
+bool VerifyOutputDexFile(dex_ir::Header* orig_header,
+                         dex_ir::Header* output_header,
+                         std::string* error_msg) {
+  dex_ir::Collections& orig = orig_header->GetCollections();
+  dex_ir::Collections& output = output_header->GetCollections();
+
+  // Compare all id sections. They have a defined order that can't be changed by dexlayout.
+  if (!VerifyIds(orig.StringIds(), output.StringIds(), "string ids", error_msg) ||
+      !VerifyIds(orig.TypeIds(), output.TypeIds(), "type ids", error_msg) ||
+      !VerifyIds(orig.ProtoIds(), output.ProtoIds(), "proto ids", error_msg) ||
+      !VerifyIds(orig.FieldIds(), output.FieldIds(), "field ids", error_msg) ||
+      !VerifyIds(orig.MethodIds(), output.MethodIds(), "method ids", error_msg)) {
+    return false;
+  }
+  // Compare class defs. The order may have been changed by dexlayout.
+  if (!VerifyClassDefs(orig.ClassDefs(), output.ClassDefs(), error_msg)) {
+    return false;
+  }
+  return true;
+}
+
+template<class T> bool VerifyIds(std::vector<std::unique_ptr<T>>& orig,
+                                 std::vector<std::unique_ptr<T>>& output,
+                                 const char* section_name,
+                                 std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched size for %s section: %zu vs %zu.", section_name, orig.size(), output.size());
+    return false;
+  }
+  for (size_t i = 0; i < orig.size(); ++i) {
+    if (!VerifyId(orig[i].get(), output[i].get(), error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::StringId* orig, dex_ir::StringId* output, std::string* error_msg) {
+  if (strcmp(orig->Data(), output->Data()) != 0) {
+    *error_msg = StringPrintf("Mismatched string data for string id %u at offset %x: %s vs %s.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Data(),
+                              output->Data());
+    return false;
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::TypeId* orig, dex_ir::TypeId* output, std::string* error_msg) {
+  if (orig->GetStringId()->GetIndex() != output->GetStringId()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched string index for type id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->GetStringId()->GetIndex(),
+                              output->GetStringId()->GetIndex());
+    return false;
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::ProtoId* orig, dex_ir::ProtoId* output, std::string* error_msg) {
+  if (orig->Shorty()->GetIndex() != output->Shorty()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched string index for proto id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Shorty()->GetIndex(),
+                              output->Shorty()->GetIndex());
+    return false;
+  }
+  if (orig->ReturnType()->GetIndex() != output->ReturnType()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched type index for proto id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->ReturnType()->GetIndex(),
+                              output->ReturnType()->GetIndex());
+    return false;
+  }
+  if (!VerifyTypeList(orig->Parameters(), output->Parameters())) {
+    *error_msg = StringPrintf("Mismatched type list for proto id %u at offset %x.",
+                              orig->GetIndex(),
+                              orig->GetOffset());
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::FieldId* orig, dex_ir::FieldId* output, std::string* error_msg) {
+  if (orig->Class()->GetIndex() != output->Class()->GetIndex()) {
+    *error_msg =
+        StringPrintf("Mismatched class type index for field id %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->Class()->GetIndex(),
+                     output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Type()->GetIndex() != output->Type()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched type index for field id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Class()->GetIndex(),
+                              output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Name()->GetIndex() != output->Name()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched string index for field id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Name()->GetIndex(),
+                              output->Name()->GetIndex());
+    return false;
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::MethodId* orig, dex_ir::MethodId* output, std::string* error_msg) {
+  if (orig->Class()->GetIndex() != output->Class()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched type index for method id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Class()->GetIndex(),
+                              output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Proto()->GetIndex() != output->Proto()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched proto index for method id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Class()->GetIndex(),
+                              output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Name()->GetIndex() != output->Name()->GetIndex()) {
+    *error_msg =
+        StringPrintf("Mismatched string index for method id %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->Name()->GetIndex(),
+                     output->Name()->GetIndex());
+    return false;
+  }
+  return true;
+}
+
+struct ClassDefCompare {
+  bool operator()(dex_ir::ClassDef* lhs, dex_ir::ClassDef* rhs) const {
+    return lhs->ClassType()->GetIndex() < rhs->ClassType()->GetIndex();
+  }
+};
+
+// The class defs may have a new order due to dexlayout. Use the class's class_idx to uniquely
+// identify them and sort them for comparison.
+bool VerifyClassDefs(std::vector<std::unique_ptr<dex_ir::ClassDef>>& orig,
+                     std::vector<std::unique_ptr<dex_ir::ClassDef>>& output,
+                     std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched size for class defs section: %zu vs %zu.", orig.size(), output.size());
+    return false;
+  }
+  // Store the class defs into sets sorted by the class's type index.
+  std::set<dex_ir::ClassDef*, ClassDefCompare> orig_set;
+  std::set<dex_ir::ClassDef*, ClassDefCompare> output_set;
+  for (size_t i = 0; i < orig.size(); ++i) {
+    orig_set.insert(orig[i].get());
+    output_set.insert(output[i].get());
+  }
+  auto orig_iter = orig_set.begin();
+  auto output_iter = output_set.begin();
+  while (orig_iter != orig_set.end() && output_iter != output_set.end()) {
+    if (!VerifyClassDef(*orig_iter, *output_iter, error_msg)) {
+      return false;
+    }
+    orig_iter++;
+    output_iter++;
+  }
+  return true;
+}
+
+bool VerifyClassDef(dex_ir::ClassDef* orig, dex_ir::ClassDef* output, std::string* error_msg) {
+  if (orig->ClassType()->GetIndex() != output->ClassType()->GetIndex()) {
+    *error_msg =
+        StringPrintf("Mismatched class type index for class def %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->ClassType()->GetIndex(),
+                     output->ClassType()->GetIndex());
+    return false;
+  }
+  if (orig->GetAccessFlags() != output->GetAccessFlags()) {
+    *error_msg =
+        StringPrintf("Mismatched access flags for class def %u at offset %x: %x vs %x.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->GetAccessFlags(),
+                     output->GetAccessFlags());
+    return false;
+  }
+  uint32_t orig_super = orig->Superclass() == nullptr ? 0 : orig->Superclass()->GetIndex();
+  uint32_t output_super = output->Superclass() == nullptr ? 0 : output->Superclass()->GetIndex();
+  if (orig_super != output_super) {
+    *error_msg =
+        StringPrintf("Mismatched super class for class def %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig_super,
+                     output_super);
+    return false;
+  }
+  if (!VerifyTypeList(orig->Interfaces(), output->Interfaces())) {
+    *error_msg = StringPrintf("Mismatched type list for class def %u at offset %x.",
+                              orig->GetIndex(),
+                              orig->GetOffset());
+    return false;
+  }
+  const char* orig_source = orig->SourceFile() == nullptr ? "" : orig->SourceFile()->Data();
+  const char* output_source = output->SourceFile() == nullptr ? "" : output->SourceFile()->Data();
+  if (strcmp(orig_source, output_source) != 0) {
+    *error_msg = StringPrintf("Mismatched source file for class def %u at offset %x: %s vs %s.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig_source,
+                              output_source);
+    return false;
+  }
+  if (!VerifyAnnotationsDirectory(orig->Annotations(), output->Annotations(), error_msg)) {
+    return false;
+  }
+  if (!VerifyClassData(orig->GetClassData(), output->GetClassData(), error_msg)) {
+    return false;
+  }
+  return VerifyEncodedArray(orig->StaticValues(), output->StaticValues(), error_msg);
+}
+
+bool VerifyTypeList(const dex_ir::TypeList* orig, const dex_ir::TypeList* output) {
+  if (orig == nullptr || output == nullptr) {
+    return orig == output;
+  }
+  const dex_ir::TypeIdVector* orig_list = orig->GetTypeList();
+  const dex_ir::TypeIdVector* output_list = output->GetTypeList();
+  if (orig_list->size() != output_list->size()) {
+    return false;
+  }
+  for (size_t i = 0; i < orig_list->size(); ++i) {
+    if ((*orig_list)[i]->GetIndex() != (*output_list)[i]->GetIndex()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationsDirectory(dex_ir::AnnotationsDirectoryItem* orig,
+                                dex_ir::AnnotationsDirectoryItem* output,
+                                std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty annotations directory.";
+      return false;
+    }
+    return true;
+  }
+  if (!VerifyAnnotationSet(orig->GetClassAnnotation(), output->GetClassAnnotation(), error_msg)) {
+    return false;
+  }
+  if (!VerifyFieldAnnotations(orig->GetFieldAnnotations(),
+                              output->GetFieldAnnotations(),
+                              orig->GetOffset(),
+                              error_msg)) {
+    return false;
+  }
+  if (!VerifyMethodAnnotations(orig->GetMethodAnnotations(),
+                               output->GetMethodAnnotations(),
+                               orig->GetOffset(),
+                               error_msg)) {
+    return false;
+  }
+  return VerifyParameterAnnotations(orig->GetParameterAnnotations(),
+                                    output->GetParameterAnnotations(),
+                                    orig->GetOffset(),
+                                    error_msg);
+}
+
+bool VerifyFieldAnnotations(dex_ir::FieldAnnotationVector* orig,
+                            dex_ir::FieldAnnotationVector* output,
+                            uint32_t orig_offset,
+                            std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = StringPrintf(
+          "Found unexpected empty field annotations for annotations directory at offset %x.",
+          orig_offset);
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched field annotations size for annotations directory at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::FieldAnnotation* orig_field = (*orig)[i].get();
+    dex_ir::FieldAnnotation* output_field = (*output)[i].get();
+    if (orig_field->GetFieldId()->GetIndex() != output_field->GetFieldId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched field annotation index for annotations directory at offset %x: %u vs %u.",
+          orig_offset,
+          orig_field->GetFieldId()->GetIndex(),
+          output_field->GetFieldId()->GetIndex());
+      return false;
+    }
+    if (!VerifyAnnotationSet(orig_field->GetAnnotationSetItem(),
+                             output_field->GetAnnotationSetItem(),
+                             error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyMethodAnnotations(dex_ir::MethodAnnotationVector* orig,
+                             dex_ir::MethodAnnotationVector* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = StringPrintf(
+          "Found unexpected empty method annotations for annotations directory at offset %x.",
+          orig_offset);
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched method annotations size for annotations directory at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::MethodAnnotation* orig_method = (*orig)[i].get();
+    dex_ir::MethodAnnotation* output_method = (*output)[i].get();
+    if (orig_method->GetMethodId()->GetIndex() != output_method->GetMethodId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched method annotation index for annotations directory at offset %x: %u vs %u.",
+          orig_offset,
+          orig_method->GetMethodId()->GetIndex(),
+          output_method->GetMethodId()->GetIndex());
+      return false;
+    }
+    if (!VerifyAnnotationSet(orig_method->GetAnnotationSetItem(),
+                             output_method->GetAnnotationSetItem(),
+                             error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyParameterAnnotations(dex_ir::ParameterAnnotationVector* orig,
+                                dex_ir::ParameterAnnotationVector* output,
+                                uint32_t orig_offset,
+                                std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = StringPrintf(
+          "Found unexpected empty parameter annotations for annotations directory at offset %x.",
+          orig_offset);
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched parameter annotations size for annotations directory at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::ParameterAnnotation* orig_param = (*orig)[i].get();
+    dex_ir::ParameterAnnotation* output_param = (*output)[i].get();
+    if (orig_param->GetMethodId()->GetIndex() != output_param->GetMethodId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched parameter annotation index for annotations directory at offset %x: %u vs %u.",
+          orig_offset,
+          orig_param->GetMethodId()->GetIndex(),
+          output_param->GetMethodId()->GetIndex());
+      return false;
+    }
+    if (!VerifyAnnotationSetRefList(orig_param->GetAnnotations(),
+                                    output_param->GetAnnotations(),
+                                    error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationSetRefList(dex_ir::AnnotationSetRefList* orig,
+                                dex_ir::AnnotationSetRefList* output,
+                                std::string* error_msg) {
+  std::vector<dex_ir::AnnotationSetItem*>* orig_items = orig->GetItems();
+  std::vector<dex_ir::AnnotationSetItem*>* output_items = output->GetItems();
+  if (orig_items->size() != output_items->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched annotation set ref list size at offset %x: %zu vs %zu.",
+        orig->GetOffset(),
+        orig_items->size(),
+        output_items->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_items->size(); ++i) {
+    if (!VerifyAnnotationSet((*orig_items)[i], (*output_items)[i], error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationSet(dex_ir::AnnotationSetItem* orig,
+                         dex_ir::AnnotationSetItem* output,
+                         std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty annotation set.";
+      return false;
+    }
+    return true;
+  }
+  std::vector<dex_ir::AnnotationItem*>* orig_items = orig->GetItems();
+  std::vector<dex_ir::AnnotationItem*>* output_items = output->GetItems();
+  if (orig_items->size() != output_items->size()) {
+    *error_msg = StringPrintf("Mismatched size for annotation set at offset %x: %zu vs %zu.",
+                              orig->GetOffset(),
+                              orig_items->size(),
+                              output_items->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_items->size(); ++i) {
+    if (!VerifyAnnotation((*orig_items)[i], (*output_items)[i], error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotation(dex_ir::AnnotationItem* orig,
+                      dex_ir::AnnotationItem* output,
+                      std::string* error_msg) {
+  if (orig->GetVisibility() != output->GetVisibility()) {
+    *error_msg = StringPrintf("Mismatched visibility for annotation at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->GetVisibility(),
+                              output->GetVisibility());
+    return false;
+  }
+  return VerifyEncodedAnnotation(orig->GetAnnotation(),
+                                 output->GetAnnotation(),
+                                 orig->GetOffset(),
+                                 error_msg);
+}
+
+bool VerifyEncodedAnnotation(dex_ir::EncodedAnnotation* orig,
+                             dex_ir::EncodedAnnotation* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg) {
+  if (orig->GetType()->GetIndex() != output->GetType()->GetIndex()) {
+    *error_msg = StringPrintf(
+        "Mismatched encoded annotation type for annotation at offset %x: %u vs %u.",
+        orig_offset,
+        orig->GetType()->GetIndex(),
+        output->GetType()->GetIndex());
+    return false;
+  }
+  dex_ir::AnnotationElementVector* orig_elements = orig->GetAnnotationElements();
+  dex_ir::AnnotationElementVector* output_elements = output->GetAnnotationElements();
+  if (orig_elements->size() != output_elements->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched encoded annotation size for annotation at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig_elements->size(),
+        output_elements->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_elements->size(); ++i) {
+    if (!VerifyAnnotationElement((*orig_elements)[i].get(),
+                                 (*output_elements)[i].get(),
+                                 orig_offset,
+                                 error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationElement(dex_ir::AnnotationElement* orig,
+                             dex_ir::AnnotationElement* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg) {
+  if (orig->GetName()->GetIndex() != output->GetName()->GetIndex()) {
+    *error_msg = StringPrintf(
+        "Mismatched annotation element name for annotation at offset %x: %u vs %u.",
+        orig_offset,
+        orig->GetName()->GetIndex(),
+        output->GetName()->GetIndex());
+    return false;
+  }
+  return VerifyEncodedValue(orig->GetValue(), output->GetValue(), orig_offset, error_msg);
+}
+
+bool VerifyEncodedValue(dex_ir::EncodedValue* orig,
+                        dex_ir::EncodedValue* output,
+                        uint32_t orig_offset,
+                        std::string* error_msg) {
+  if (orig->Type() != output->Type()) {
+    *error_msg = StringPrintf(
+        "Mismatched encoded value type for annotation or encoded array at offset %x: %d vs %d.",
+        orig_offset,
+        orig->Type(),
+        output->Type());
+    return false;
+  }
+  switch (orig->Type()) {
+    case DexFile::kDexAnnotationByte:
+      if (orig->GetByte() != output->GetByte()) {
+        *error_msg = StringPrintf("Mismatched encoded byte for annotation at offset %x: %d vs %d.",
+                                  orig_offset,
+                                  orig->GetByte(),
+                                  output->GetByte());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationShort:
+      if (orig->GetShort() != output->GetShort()) {
+        *error_msg = StringPrintf("Mismatched encoded short for annotation at offset %x: %d vs %d.",
+                                  orig_offset,
+                                  orig->GetShort(),
+                                  output->GetShort());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationChar:
+      if (orig->GetChar() != output->GetChar()) {
+        *error_msg = StringPrintf("Mismatched encoded char for annotation at offset %x: %c vs %c.",
+                                  orig_offset,
+                                  orig->GetChar(),
+                                  output->GetChar());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationInt:
+      if (orig->GetInt() != output->GetInt()) {
+        *error_msg = StringPrintf("Mismatched encoded int for annotation at offset %x: %d vs %d.",
+                                  orig_offset,
+                                  orig->GetInt(),
+                                  output->GetInt());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationLong:
+      if (orig->GetLong() != output->GetLong()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded long for annotation at offset %x: %" PRId64 " vs %" PRId64 ".",
+            orig_offset,
+            orig->GetLong(),
+            output->GetLong());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationFloat:
+      // The float value is encoded, so compare as if it's an int.
+      if (orig->GetInt() != output->GetInt()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded float for annotation at offset %x: %x (encoded) vs %x (encoded).",
+                                  orig_offset,
+                                  orig->GetInt(),
+                                  output->GetInt());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationDouble:
+      // The double value is encoded, so compare as if it's a long.
+      if (orig->GetLong() != output->GetLong()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded double for annotation at offset %x: %" PRIx64
+            " (encoded) vs %" PRIx64 " (encoded).",
+            orig_offset,
+            orig->GetLong(),
+            output->GetLong());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationString:
+      if (orig->GetStringId()->GetIndex() != output->GetStringId()->GetIndex()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded string for annotation at offset %x: %s vs %s.",
+            orig_offset,
+            orig->GetStringId()->Data(),
+            output->GetStringId()->Data());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationType:
+      if (orig->GetTypeId()->GetIndex() != output->GetTypeId()->GetIndex()) {
+        *error_msg = StringPrintf("Mismatched encoded type for annotation at offset %x: %u vs %u.",
+                                  orig_offset,
+                                  orig->GetTypeId()->GetIndex(),
+                                  output->GetTypeId()->GetIndex());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationField:
+    case DexFile::kDexAnnotationEnum:
+      if (orig->GetFieldId()->GetIndex() != output->GetFieldId()->GetIndex()) {
+        *error_msg = StringPrintf("Mismatched encoded field for annotation at offset %x: %u vs %u.",
+                                  orig_offset,
+                                  orig->GetFieldId()->GetIndex(),
+                                  output->GetFieldId()->GetIndex());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationMethod:
+      if (orig->GetMethodId()->GetIndex() != output->GetMethodId()->GetIndex()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded method for annotation at offset %x: %u vs %u.",
+            orig_offset,
+            orig->GetMethodId()->GetIndex(),
+            output->GetMethodId()->GetIndex());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationArray:
+      if (!VerifyEncodedArray(orig->GetEncodedArray(), output->GetEncodedArray(), error_msg)) {
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationAnnotation:
+      if (!VerifyEncodedAnnotation(orig->GetEncodedAnnotation(),
+                                   output->GetEncodedAnnotation(),
+                                   orig_offset,
+                                   error_msg)) {
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationNull:
+      break;
+    case DexFile::kDexAnnotationBoolean:
+      if (orig->GetBoolean() != output->GetBoolean()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded boolean for annotation at offset %x: %d vs %d.",
+            orig_offset,
+            orig->GetBoolean(),
+            output->GetBoolean());
+        return false;
+      }
+      break;
+    default:
+      break;
+  }
+  return true;
+}
+
+bool VerifyEncodedArray(dex_ir::EncodedArrayItem* orig,
+                        dex_ir::EncodedArrayItem* output,
+                        std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty encoded array.";
+      return false;
+    }
+    return true;
+  }
+  dex_ir::EncodedValueVector* orig_vector = orig->GetEncodedValues();
+  dex_ir::EncodedValueVector* output_vector = output->GetEncodedValues();
+  if (orig_vector->size() != output_vector->size()) {
+    *error_msg = StringPrintf("Mismatched size for encoded array at offset %x: %zu vs %zu.",
+                              orig->GetOffset(),
+                              orig_vector->size(),
+                              output_vector->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_vector->size(); ++i) {
+    if (!VerifyEncodedValue((*orig_vector)[i].get(),
+                            (*output_vector)[i].get(),
+                            orig->GetOffset(),
+                            error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyClassData(dex_ir::ClassData* orig, dex_ir::ClassData* output, std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty class data.";
+      return false;
+    }
+    return true;
+  }
+  if (!VerifyFields(orig->StaticFields(), output->StaticFields(), orig->GetOffset(), error_msg)) {
+    return false;
+  }
+  if (!VerifyFields(orig->InstanceFields(),
+                    output->InstanceFields(),
+                    orig->GetOffset(),
+                    error_msg)) {
+    return false;
+  }
+  if (!VerifyMethods(orig->DirectMethods(),
+                     output->DirectMethods(),
+                     orig->GetOffset(),
+                     error_msg)) {
+    return false;
+  }
+  return VerifyMethods(orig->VirtualMethods(),
+                       output->VirtualMethods(),
+                       orig->GetOffset(),
+                       error_msg);
+}
+
+bool VerifyFields(dex_ir::FieldItemVector* orig,
+                  dex_ir::FieldItemVector* output,
+                  uint32_t orig_offset,
+                  std::string* error_msg) {
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf("Mismatched fields size for class data at offset %x: %zu vs %zu.",
+                              orig_offset,
+                              orig->size(),
+                              output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::FieldItem* orig_field = (*orig)[i].get();
+    dex_ir::FieldItem* output_field = (*output)[i].get();
+    if (orig_field->GetFieldId()->GetIndex() != output_field->GetFieldId()->GetIndex()) {
+      *error_msg = StringPrintf("Mismatched field index for class data at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig_field->GetFieldId()->GetIndex(),
+                                output_field->GetFieldId()->GetIndex());
+      return false;
+    }
+    if (orig_field->GetAccessFlags() != output_field->GetAccessFlags()) {
+      *error_msg = StringPrintf(
+          "Mismatched field access flags for class data at offset %x: %u vs %u.",
+          orig_offset,
+          orig_field->GetAccessFlags(),
+          output_field->GetAccessFlags());
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyMethods(dex_ir::MethodItemVector* orig,
+                   dex_ir::MethodItemVector* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg) {
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf("Mismatched methods size for class data at offset %x: %zu vs %zu.",
+                              orig_offset,
+                              orig->size(),
+                              output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::MethodItem* orig_method = (*orig)[i].get();
+    dex_ir::MethodItem* output_method = (*output)[i].get();
+    if (orig_method->GetMethodId()->GetIndex() != output_method->GetMethodId()->GetIndex()) {
+      *error_msg = StringPrintf("Mismatched method index for class data at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig_method->GetMethodId()->GetIndex(),
+                                output_method->GetMethodId()->GetIndex());
+      return false;
+    }
+    if (orig_method->GetAccessFlags() != output_method->GetAccessFlags()) {
+      *error_msg = StringPrintf(
+          "Mismatched method access flags for class data at offset %x: %u vs %u.",
+          orig_offset,
+          orig_method->GetAccessFlags(),
+          output_method->GetAccessFlags());
+      return false;
+    }
+    if (!VerifyCode(orig_method->GetCodeItem(), output_method->GetCodeItem(), error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyCode(dex_ir::CodeItem* orig, dex_ir::CodeItem* output, std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty code item.";
+      return false;
+    }
+    return true;
+  }
+  if (orig->RegistersSize() != output->RegistersSize()) {
+    *error_msg = StringPrintf("Mismatched registers size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->RegistersSize(),
+                              output->RegistersSize());
+    return false;
+  }
+  if (orig->InsSize() != output->InsSize()) {
+    *error_msg = StringPrintf("Mismatched ins size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->InsSize(),
+                              output->InsSize());
+    return false;
+  }
+  if (orig->OutsSize() != output->OutsSize()) {
+    *error_msg = StringPrintf("Mismatched outs size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->OutsSize(),
+                              output->OutsSize());
+    return false;
+  }
+  if (orig->TriesSize() != output->TriesSize()) {
+    *error_msg = StringPrintf("Mismatched tries size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->TriesSize(),
+                              output->TriesSize());
+    return false;
+  }
+  if (!VerifyDebugInfo(orig->DebugInfo(), output->DebugInfo(), error_msg)) {
+    return false;
+  }
+  if (orig->InsnsSize() != output->InsnsSize()) {
+    *error_msg = StringPrintf("Mismatched insns size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->InsnsSize(),
+                              output->InsnsSize());
+    return false;
+  }
+  if (memcmp(orig->Insns(), output->Insns(), orig->InsnsSize()) != 0) {
+    *error_msg = StringPrintf("Mismatched insns for code item at offset %x.",
+                              orig->GetOffset());
+    return false;
+  }
+  if (!VerifyTries(orig->Tries(), output->Tries(), orig->GetOffset(), error_msg)) {
+    return false;
+  }
+  return VerifyHandlers(orig->Handlers(), output->Handlers(), orig->GetOffset(), error_msg);
+}
+
+bool VerifyDebugInfo(dex_ir::DebugInfoItem* orig,
+                     dex_ir::DebugInfoItem* output,
+                     std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty debug info.";
+      return false;
+    }
+    return true;
+  }
+  if (!VerifyPositionInfo(orig->GetPositionInfo(),
+                          output->GetPositionInfo(),
+                          orig->GetOffset(),
+                          error_msg)) {
+    return false;
+  }
+  return VerifyLocalInfo(orig->GetLocalInfo(),
+                         output->GetLocalInfo(),
+                         orig->GetOffset(),
+                         error_msg);
+}
+
+bool VerifyPositionInfo(dex_ir::PositionInfoVector& orig,
+                        dex_ir::PositionInfoVector& output,
+                        uint32_t orig_offset,
+                        std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched number of positions for debug info at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig.size(),
+        output.size());
+    return false;
+  }
+  for (size_t i = 0; i < orig.size(); ++i) {
+    if (orig[i]->address_ != output[i]->address_) {
+      *error_msg = StringPrintf(
+          "Mismatched position address for debug info at offset %x: %u vs %u.",
+          orig_offset,
+          orig[i]->address_,
+          output[i]->address_);
+      return false;
+    }
+    if (orig[i]->line_ != output[i]->line_) {
+      *error_msg = StringPrintf("Mismatched position line for debug info at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig[i]->line_,
+                                output[i]->line_);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyLocalInfo(dex_ir::LocalInfoVector& orig,
+                     dex_ir::LocalInfoVector& output,
+                     uint32_t orig_offset,
+                     std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched number of locals for debug info at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig.size(),
+        output.size());
+    return false;
+  }
+  for (size_t i = 0; i < orig.size(); ++i) {
+    if (orig[i]->name_ != output[i]->name_) {
+      *error_msg = StringPrintf("Mismatched local name for debug info at offset %x: %s vs %s.",
+                                orig_offset,
+                                orig[i]->name_.c_str(),
+                                output[i]->name_.c_str());
+      return false;
+    }
+    if (orig[i]->descriptor_ != output[i]->descriptor_) {
+      *error_msg = StringPrintf(
+          "Mismatched local descriptor for debug info at offset %x: %s vs %s.",
+          orig_offset,
+          orig[i]->descriptor_.c_str(),
+          output[i]->descriptor_.c_str());
+      return false;
+    }
+    if (orig[i]->signature_ != output[i]->signature_) {
+      *error_msg = StringPrintf("Mismatched local signature for debug info at offset %x: %s vs %s.",
+                                orig_offset,
+                                orig[i]->signature_.c_str(),
+                                output[i]->signature_.c_str());
+      return false;
+    }
+    if (orig[i]->start_address_ != output[i]->start_address_) {
+      *error_msg = StringPrintf(
+          "Mismatched local start address for debug info at offset %x: %u vs %u.",
+          orig_offset,
+          orig[i]->start_address_,
+          output[i]->start_address_);
+      return false;
+    }
+    if (orig[i]->end_address_ != output[i]->end_address_) {
+      *error_msg = StringPrintf(
+          "Mismatched local end address for debug info at offset %x: %u vs %u.",
+          orig_offset,
+          orig[i]->end_address_,
+          output[i]->end_address_);
+      return false;
+    }
+    if (orig[i]->reg_ != output[i]->reg_) {
+      *error_msg = StringPrintf("Mismatched local reg for debug info at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig[i]->reg_,
+                                output[i]->reg_);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyTries(dex_ir::TryItemVector* orig,
+                 dex_ir::TryItemVector* output,
+                 uint32_t orig_offset,
+                 std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty try items.";
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf("Mismatched tries size for code item at offset %x: %zu vs %zu.",
+                              orig_offset,
+                              orig->size(),
+                              output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    const dex_ir::TryItem* orig_try = (*orig)[i].get();
+    const dex_ir::TryItem* output_try = (*output)[i].get();
+    if (orig_try->StartAddr() != output_try->StartAddr()) {
+      *error_msg = StringPrintf(
+          "Mismatched try item start addr for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_try->StartAddr(),
+          output_try->StartAddr());
+      return false;
+    }
+    if (orig_try->InsnCount() != output_try->InsnCount()) {
+      *error_msg = StringPrintf(
+          "Mismatched try item insn count for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_try->InsnCount(),
+                                output_try->InsnCount());
+      return false;
+    }
+    if (!VerifyHandler(orig_try->GetHandlers(),
+                       output_try->GetHandlers(),
+                       orig_offset,
+                       error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyHandlers(dex_ir::CatchHandlerVector* orig,
+                    dex_ir::CatchHandlerVector* output,
+                    uint32_t orig_offset,
+                    std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty catch handlers.";
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched catch handlers size for code item at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    if (!VerifyHandler((*orig)[i].get(), (*output)[i].get(), orig_offset, error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyHandler(const dex_ir::CatchHandler* orig,
+                   const dex_ir::CatchHandler* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg) {
+  dex_ir::TypeAddrPairVector* orig_handlers = orig->GetHandlers();
+  dex_ir::TypeAddrPairVector* output_handlers = output->GetHandlers();
+  if (orig_handlers->size() != output_handlers->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched number of catch handlers for code item at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig_handlers->size(),
+        output_handlers->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_handlers->size(); ++i) {
+    const dex_ir::TypeAddrPair* orig_handler = (*orig_handlers)[i].get();
+    const dex_ir::TypeAddrPair* output_handler = (*output_handlers)[i].get();
+    if (orig_handler->GetTypeId() == nullptr || output_handler->GetTypeId() == nullptr) {
+      if (orig_handler->GetTypeId() != output_handler->GetTypeId()) {
+        *error_msg = StringPrintf(
+            "Found unexpected catch all catch handler for code item at offset %x.",
+            orig_offset);
+        return false;
+      }
+    } else if (orig_handler->GetTypeId()->GetIndex() != output_handler->GetTypeId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched catch handler type for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_handler->GetTypeId()->GetIndex(),
+          output_handler->GetTypeId()->GetIndex());
+      return false;
+    }
+    if (orig_handler->GetAddress() != output_handler->GetAddress()) {
+      *error_msg = StringPrintf(
+          "Mismatched catch handler address for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_handler->GetAddress(),
+          output_handler->GetAddress());
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/dexlayout/dex_verify.h b/dexlayout/dex_verify.h
new file mode 100644
index 0000000..58c95d6
--- /dev/null
+++ b/dexlayout/dex_verify.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Header file of dex ir verifier.
+ *
+ * Compares two dex files at the IR level, allowing differences in layout, but not in data.
+ */
+
+#ifndef ART_DEXLAYOUT_DEX_VERIFY_H_
+#define ART_DEXLAYOUT_DEX_VERIFY_H_
+
+#include "dex_ir.h"
+
+namespace art {
+// Check that the output dex file contains the same data as the original.
+// Compares the dex IR of both dex files. Allows the dex files to have different layouts.
+bool VerifyOutputDexFile(dex_ir::Header* orig_header,
+                         dex_ir::Header* output_header,
+                         std::string* error_msg);
+
+template<class T> bool VerifyIds(std::vector<std::unique_ptr<T>>& orig,
+                                 std::vector<std::unique_ptr<T>>& output,
+                                 const char* section_name,
+                                 std::string* error_msg);
+bool VerifyId(dex_ir::StringId* orig, dex_ir::StringId* output, std::string* error_msg);
+bool VerifyId(dex_ir::TypeId* orig, dex_ir::TypeId* output, std::string* error_msg);
+bool VerifyId(dex_ir::ProtoId* orig, dex_ir::ProtoId* output, std::string* error_msg);
+bool VerifyId(dex_ir::FieldId* orig, dex_ir::FieldId* output, std::string* error_msg);
+bool VerifyId(dex_ir::MethodId* orig, dex_ir::MethodId* output, std::string* error_msg);
+
+bool VerifyClassDefs(std::vector<std::unique_ptr<dex_ir::ClassDef>>& orig,
+                     std::vector<std::unique_ptr<dex_ir::ClassDef>>& output,
+                     std::string* error_msg);
+bool VerifyClassDef(dex_ir::ClassDef* orig, dex_ir::ClassDef* output, std::string* error_msg);
+
+bool VerifyTypeList(const dex_ir::TypeList* orig, const dex_ir::TypeList* output);
+
+bool VerifyAnnotationsDirectory(dex_ir::AnnotationsDirectoryItem* orig,
+                                dex_ir::AnnotationsDirectoryItem* output,
+                                std::string* error_msg);
+bool VerifyFieldAnnotations(dex_ir::FieldAnnotationVector* orig,
+                            dex_ir::FieldAnnotationVector* output,
+                            uint32_t orig_offset,
+                            std::string* error_msg);
+bool VerifyMethodAnnotations(dex_ir::MethodAnnotationVector* orig,
+                             dex_ir::MethodAnnotationVector* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg);
+bool VerifyParameterAnnotations(dex_ir::ParameterAnnotationVector* orig,
+                                dex_ir::ParameterAnnotationVector* output,
+                                uint32_t orig_offset,
+                                std::string* error_msg);
+bool VerifyAnnotationSetRefList(dex_ir::AnnotationSetRefList* orig,
+                                dex_ir::AnnotationSetRefList* output,
+                                std::string* error_msg);
+bool VerifyAnnotationSet(dex_ir::AnnotationSetItem* orig,
+                         dex_ir::AnnotationSetItem* output,
+                         std::string* error_msg);
+bool VerifyAnnotation(dex_ir::AnnotationItem* orig,
+                      dex_ir::AnnotationItem* output,
+                      std::string* error_msg);
+bool VerifyEncodedAnnotation(dex_ir::EncodedAnnotation* orig,
+                             dex_ir::EncodedAnnotation* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg);
+bool VerifyAnnotationElement(dex_ir::AnnotationElement* orig,
+                             dex_ir::AnnotationElement* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg);
+bool VerifyEncodedValue(dex_ir::EncodedValue* orig,
+                        dex_ir::EncodedValue* output,
+                        uint32_t orig_offset,
+                        std::string* error_msg);
+bool VerifyEncodedArray(dex_ir::EncodedArrayItem* orig,
+                        dex_ir::EncodedArrayItem* output,
+                        std::string* error_msg);
+
+bool VerifyClassData(dex_ir::ClassData* orig, dex_ir::ClassData* output, std::string* error_msg);
+bool VerifyFields(dex_ir::FieldItemVector* orig,
+                  dex_ir::FieldItemVector* output,
+                  uint32_t orig_offset,
+                  std::string* error_msg);
+bool VerifyMethods(dex_ir::MethodItemVector* orig,
+                   dex_ir::MethodItemVector* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg);
+bool VerifyCode(dex_ir::CodeItem* orig, dex_ir::CodeItem* output, std::string* error_msg);
+bool VerifyDebugInfo(dex_ir::DebugInfoItem* orig,
+                     dex_ir::DebugInfoItem* output,
+                     std::string* error_msg);
+bool VerifyPositionInfo(dex_ir::PositionInfoVector& orig,
+                        dex_ir::PositionInfoVector& output,
+                        uint32_t orig_offset,
+                        std::string* error_msg);
+bool VerifyLocalInfo(dex_ir::LocalInfoVector& orig,
+                     dex_ir::LocalInfoVector& output,
+                     uint32_t orig_offset,
+                     std::string* error_msg);
+bool VerifyTries(dex_ir::TryItemVector* orig,
+                 dex_ir::TryItemVector* output,
+                 uint32_t orig_offset,
+                 std::string* error_msg);
+bool VerifyHandlers(dex_ir::CatchHandlerVector* orig,
+                    dex_ir::CatchHandlerVector* output,
+                    uint32_t orig_offset,
+                    std::string* error_msg);
+bool VerifyHandler(const dex_ir::CatchHandler* orig,
+                   const dex_ir::CatchHandler* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg);
+}  // namespace art
+
+#endif  // ART_DEXLAYOUT_DEX_VERIFY_H_
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index 75d47e4..8997146 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -35,6 +35,12 @@
 
 namespace art {
 
+std::string MultidexName(const std::string& prefix,
+                         size_t dex_file_index,
+                         const std::string& suffix) {
+  return prefix + ((dex_file_index > 0) ? std::to_string(dex_file_index + 1) : "") + suffix;
+}
+
 struct FileSection {
  public:
   std::string name_;
@@ -43,8 +49,22 @@
   std::function<uint32_t(const dex_ir::Collections&)> offset_fn_;
 };
 
+static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
+  return 0;
+}
+
+static uint32_t HeaderSize(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
+  // Size is in elements, so there is only one header.
+  return 1;
+}
+
 static const std::vector<FileSection> kFileSections = {
   {
+    "Header",
+    DexFile::kDexTypeHeaderItem,
+    &HeaderSize,
+    &HeaderOffset,
+  }, {
     "StringId",
     DexFile::kDexTypeStringIdItem,
     &dex_ir::Collections::StringIdsSize,
@@ -127,58 +147,71 @@
   }
 };
 
+static constexpr bool kSortAscending = false;
+static constexpr bool kSortDescending = true;
+
+static std::vector<const FileSection*> GetSortedSections(
+    const dex_ir::Collections& collections,
+    bool sort_descending) {
+  std::vector<const FileSection*> sorted_sections;
+  // Build the table that will map from offset to color
+  for (const FileSection& s : kFileSections) {
+    sorted_sections.push_back(&s);
+  }
+  // Sort by offset.
+  std::sort(sorted_sections.begin(),
+            sorted_sections.end(),
+            [&](const FileSection* a, const FileSection* b) {
+              if (sort_descending) {
+                return a->offset_fn_(collections) > b->offset_fn_(collections);
+              } else {
+                return a->offset_fn_(collections) < b->offset_fn_(collections);
+              }
+            });
+  return sorted_sections;
+}
+
 class Dumper {
  public:
   // Colors are based on the type of the section in MapList.
-  Dumper(const dex_ir::Collections& collections, size_t dex_file_index) {
-    // Build the table that will map from offset to color
-    table_.emplace_back(DexFile::kDexTypeHeaderItem, 0u);
-    for (const FileSection& s : kFileSections) {
-      table_.emplace_back(s.type_, s.offset_fn_(collections));
-    }
-    // Sort into descending order by offset.
-    std::sort(table_.begin(),
-              table_.end(),
-              [](const SectionColor& a, const SectionColor& b) { return a.offset_ > b.offset_; });
+  explicit Dumper(const dex_ir::Collections& collections)
+      : collections_(collections), out_file_(nullptr),
+        sorted_sections_(GetSortedSections(collections, kSortDescending)) { }
+
+  bool OpenAndPrintHeader(size_t dex_index) {
     // Open the file and emit the gnuplot prologue.
-    std::string dex_file_name("classes");
-    std::string out_file_base_name("layout");
-    if (dex_file_index > 0) {
-      out_file_base_name += std::to_string(dex_file_index + 1);
-      dex_file_name += std::to_string(dex_file_index + 1);
+    out_file_ = fopen(MultidexName("layout", dex_index, ".gnuplot").c_str(), "w");
+    if (out_file_ == nullptr) {
+      return false;
     }
-    dex_file_name += ".dex";
-    std::string out_file_name(out_file_base_name + ".gnuplot");
-    std::string png_file_name(out_file_base_name + ".png");
-    out_file_ = fopen(out_file_name.c_str(), "w");
     fprintf(out_file_, "set terminal png size 1920,1080\n");
-    fprintf(out_file_, "set output \"%s\"\n", png_file_name.c_str());
-    fprintf(out_file_, "set title \"%s\"\n", dex_file_name.c_str());
+    fprintf(out_file_, "set output \"%s\"\n", MultidexName("layout", dex_index, ".png").c_str());
+    fprintf(out_file_, "set title \"%s\"\n", MultidexName("classes", dex_index, ".dex").c_str());
     fprintf(out_file_, "set xlabel \"Page offset into dex\"\n");
     fprintf(out_file_, "set ylabel \"ClassDef index\"\n");
     fprintf(out_file_, "set xtics rotate out (");
-    fprintf(out_file_, "\"Header\" %d, ", 0);
     bool printed_one = false;
     for (const FileSection& s : kFileSections) {
-      if (s.size_fn_(collections) > 0) {
+      if (s.size_fn_(collections_) > 0) {
         if (printed_one) {
           fprintf(out_file_, ", ");
         }
-        fprintf(out_file_, "\"%s\" %d", s.name_.c_str(), s.offset_fn_(collections) / kPageSize);
+        fprintf(out_file_, "\"%s\" %d", s.name_.c_str(), s.offset_fn_(collections_) / kPageSize);
         printed_one = true;
       }
     }
     fprintf(out_file_, ")\n");
     fprintf(out_file_,
             "plot \"-\" using 1:2:3:4:5 with vector nohead linewidth 1 lc variable notitle\n");
+    return true;
   }
 
   int GetColor(uint32_t offset) const {
     // The dread linear search to find the right section for the reference.
     uint16_t section = 0;
-    for (uint16_t i = 0; i < table_.size(); ++i) {
-      if (table_[i].offset_ < offset) {
-        section = table_[i].type_;
+    for (const FileSection* file_section : sorted_sections_) {
+      if (file_section->offset_fn_(collections_) < offset) {
+        section = file_section->type_;
         break;
       }
     }
@@ -308,13 +341,6 @@
   }
 
  private:
-  struct SectionColor {
-   public:
-    SectionColor(uint16_t type, uint32_t offset) : type_(type), offset_(offset) { }
-    uint16_t type_;
-    uint32_t offset_;
-  };
-
   using ColorMapType = std::map<uint16_t, int>;
   const ColorMapType kColorMap = {
     { DexFile::kDexTypeHeaderItem, 1 },
@@ -336,8 +362,9 @@
     { DexFile::kDexTypeAnnotationsDirectoryItem, 16 }
   };
 
-  std::vector<SectionColor> table_;
+  const dex_ir::Collections& collections_;
   FILE* out_file_;
+  std::vector<const FileSection*> sorted_sections_;
 
   DISALLOW_COPY_AND_ASSIGN(Dumper);
 };
@@ -350,7 +377,11 @@
                         const DexFile* dex_file,
                         size_t dex_file_index,
                         ProfileCompilationInfo* profile_info) {
-  std::unique_ptr<Dumper> dumper(new Dumper(header->GetCollections(), dex_file_index));
+  std::unique_ptr<Dumper> dumper(new Dumper(header->GetCollections()));
+  if (!dumper->OpenAndPrintHeader(dex_file_index)) {
+    fprintf(stderr, "Could not open output file.\n");
+    return;
+  }
 
   const uint32_t class_defs_size = header->GetCollections().ClassDefsSize();
   for (uint32_t class_index = 0; class_index < class_defs_size; class_index++) {
@@ -401,4 +432,22 @@
   }  // for
 }
 
+/*
+ * Dumps the offset and size of sections within the file.
+ */
+void ShowDexSectionStatistics(dex_ir::Header* header, size_t dex_file_index) {
+  // Compute the (multidex) class file name).
+  fprintf(stdout, "%s\n", MultidexName("classes", dex_file_index, ".dex").c_str());
+  fprintf(stdout, "section    offset     items\n");
+  const dex_ir::Collections& collections = header->GetCollections();
+  std::vector<const FileSection*> sorted_sections(GetSortedSections(collections, kSortAscending));
+  for (const FileSection* file_section : sorted_sections) {
+    fprintf(stdout, "%-10s 0x%08x 0x%08x\n",
+      file_section->name_.c_str(),
+      file_section->offset_fn_(collections),
+      file_section->size_fn_(collections));
+  }
+  fprintf(stdout, "\n");
+}
+
 }  // namespace art
diff --git a/dexlayout/dex_visualize.h b/dexlayout/dex_visualize.h
index 09f8306..a1aa2cd 100644
--- a/dexlayout/dex_visualize.h
+++ b/dexlayout/dex_visualize.h
@@ -38,6 +38,8 @@
                         size_t dex_file_index,
                         ProfileCompilationInfo* profile_info);
 
+void ShowDexSectionStatistics(dex_ir::Header* header, size_t dex_file_index);
+
 }  // namespace art
 
 #endif  // ART_DEXLAYOUT_DEX_VISUALIZE_H_
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 1add6bf..615bcf9 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -34,7 +34,9 @@
 
 #include "dex_ir_builder.h"
 #include "dex_file-inl.h"
+#include "dex_file_verifier.h"
 #include "dex_instruction-inl.h"
+#include "dex_verify.h"
 #include "dex_visualize.h"
 #include "dex_writer.h"
 #include "jit/profile_compilation_info.h"
@@ -46,6 +48,8 @@
 
 using android::base::StringPrintf;
 
+static constexpr uint32_t kDexCodeItemAlignment = 4;
+
 /*
  * Flags for use with createAccessFlagStr().
  */
@@ -1365,10 +1369,11 @@
   }
 
   // Interfaces.
-  const dex_ir::TypeIdVector* interfaces = class_def->Interfaces();
+  const dex_ir::TypeList* interfaces = class_def->Interfaces();
   if (interfaces != nullptr) {
-    for (uint32_t i = 0; i < interfaces->size(); i++) {
-      DumpInterface((*interfaces)[i], i);
+    const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
+    for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
+      DumpInterface((*interfaces_vector)[i], i);
     }  // for
   }
 
@@ -1489,7 +1494,7 @@
   }
 }
 
-std::vector<dex_ir::ClassDef*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
+std::vector<dex_ir::ClassData*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
   std::vector<dex_ir::ClassDef*> new_class_def_order;
   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
     dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
@@ -1505,46 +1510,101 @@
   }
   uint32_t class_defs_offset = header_->GetCollections().ClassDefsOffset();
   uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
+  std::unordered_set<dex_ir::ClassData*> visited_class_data;
+  std::vector<dex_ir::ClassData*> new_class_data_order;
   for (uint32_t i = 0; i < new_class_def_order.size(); ++i) {
     dex_ir::ClassDef* class_def = new_class_def_order[i];
     class_def->SetIndex(i);
     class_def->SetOffset(class_defs_offset);
     class_defs_offset += dex_ir::ClassDef::ItemSize();
-    if (class_def->GetClassData() != nullptr) {
-      class_def->GetClassData()->SetOffset(class_data_offset);
-      class_data_offset += class_def->GetClassData()->GetSize();
+    dex_ir::ClassData* class_data = class_def->GetClassData();
+    if (class_data != nullptr && visited_class_data.find(class_data) == visited_class_data.end()) {
+      class_data->SetOffset(class_data_offset);
+      class_data_offset += class_data->GetSize();
+      visited_class_data.insert(class_data);
+      new_class_data_order.push_back(class_data);
     }
   }
-  return new_class_def_order;
+  return new_class_data_order;
 }
 
-int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassDef*> new_class_def_order) {
-  int32_t diff = 0;
-  uint32_t offset = header_->GetCollections().CodeItemsOffset();
-  for (dex_ir::ClassDef* class_def : new_class_def_order) {
-    dex_ir::ClassData* class_data = class_def->GetClassData();
-    if (class_data != nullptr) {
-      class_data->SetOffset(class_data->GetOffset() + diff);
-      for (auto& method : *class_data->DirectMethods()) {
-        dex_ir::CodeItem* code_item = method->GetCodeItem();
-        if (code_item != nullptr) {
-          diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
-          code_item->SetOffset(offset);
-          offset += RoundUp(code_item->GetSize(), 4);
-        }
+// Orders code items according to specified class data ordering.
+// NOTE: If the section following the code items is byte aligned, the last code item is left in
+// place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
+int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order) {
+  // Do not move code items if class data section precedes code item section.
+  // ULEB encoding is variable length, causing problems determining the offset of the code items.
+  // TODO: We should swap the order of these sections in the future to avoid this issue.
+  uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
+  uint32_t code_item_offset = header_->GetCollections().CodeItemsOffset();
+  if (class_data_offset < code_item_offset) {
+    return 0;
+  }
+
+  // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
+  std::unordered_set<dex_ir::CodeItem*> visited_code_items;
+  bool is_code_item_aligned = IsNextSectionCodeItemAligned(code_item_offset);
+  if (!is_code_item_aligned) {
+    dex_ir::CodeItem* last_code_item = nullptr;
+    for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
+      std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
+      if (last_code_item == nullptr || last_code_item->GetOffset() < code_item->GetOffset()) {
+        last_code_item = code_item.get();
       }
-      for (auto& method : *class_data->VirtualMethods()) {
-        dex_ir::CodeItem* code_item = method->GetCodeItem();
-        if (code_item != nullptr) {
-          diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
-          code_item->SetOffset(offset);
-          offset += RoundUp(code_item->GetSize(), 4);
-        }
+    }
+    // Preserve the last code item by marking it already visited.
+    visited_code_items.insert(last_code_item);
+  }
+
+  int32_t diff = 0;
+  for (dex_ir::ClassData* class_data : new_class_data_order) {
+    class_data->SetOffset(class_data->GetOffset() + diff);
+    for (auto& method : *class_data->DirectMethods()) {
+      dex_ir::CodeItem* code_item = method->GetCodeItem();
+      if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
+        visited_code_items.insert(code_item);
+        diff += UnsignedLeb128Size(code_item_offset) - UnsignedLeb128Size(code_item->GetOffset());
+        code_item->SetOffset(code_item_offset);
+        code_item_offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+      }
+    }
+    for (auto& method : *class_data->VirtualMethods()) {
+      dex_ir::CodeItem* code_item = method->GetCodeItem();
+      if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
+        visited_code_items.insert(code_item);
+        diff += UnsignedLeb128Size(code_item_offset) - UnsignedLeb128Size(code_item->GetOffset());
+        code_item->SetOffset(code_item_offset);
+        code_item_offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
       }
     }
   }
+  // Adjust diff to be 4-byte aligned.
+  return RoundUp(diff, kDexCodeItemAlignment);
+}
 
-  return diff;
+bool DexLayout::IsNextSectionCodeItemAligned(uint32_t offset) {
+  dex_ir::Collections& collections = header_->GetCollections();
+  std::set<uint32_t> section_offsets;
+  section_offsets.insert(collections.MapListOffset());
+  section_offsets.insert(collections.TypeListsOffset());
+  section_offsets.insert(collections.AnnotationSetRefListsOffset());
+  section_offsets.insert(collections.AnnotationSetItemsOffset());
+  section_offsets.insert(collections.ClassDatasOffset());
+  section_offsets.insert(collections.CodeItemsOffset());
+  section_offsets.insert(collections.StringDatasOffset());
+  section_offsets.insert(collections.DebugInfoItemsOffset());
+  section_offsets.insert(collections.AnnotationItemsOffset());
+  section_offsets.insert(collections.EncodedArrayItemsOffset());
+  section_offsets.insert(collections.AnnotationsDirectoryItemsOffset());
+
+  auto found = section_offsets.find(offset);
+  if (found != section_offsets.end()) {
+    found++;
+    if (found != section_offsets.end()) {
+      return *found % kDexCodeItemAlignment == 0;
+    }
+  }
+  return false;
 }
 
 // Adjust offsets of every item in the specified section by diff bytes.
@@ -1626,17 +1686,16 @@
 }
 
 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
-  std::vector<dex_ir::ClassDef*> new_class_def_order = LayoutClassDefsAndClassData(dex_file);
-  int32_t diff = LayoutCodeItems(new_class_def_order);
-  // Adjust diff to be 4-byte aligned.
-  diff = RoundUp(diff, 4);
+  std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
+  int32_t diff = LayoutCodeItems(new_class_data_order);
   // Move sections after ClassData by diff bytes.
   FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
   // Update file size.
   header_->SetFileSize(header_->FileSize() + diff);
 }
 
-void DexLayout::OutputDexFile(const std::string& dex_file_location) {
+void DexLayout::OutputDexFile(const DexFile* dex_file) {
+  const std::string& dex_file_location = dex_file->GetLocation();
   std::string error_msg;
   std::unique_ptr<File> new_file;
   if (!options_.output_to_memmap_) {
@@ -1669,6 +1728,25 @@
   if (new_file != nullptr) {
     UNUSED(new_file->FlushCloseOrErase());
   }
+  // Verify the output dex file's structure for debug builds.
+  if (kIsDebugBuild) {
+    std::string location = "memory mapped file for " + dex_file_location;
+    std::unique_ptr<const DexFile> output_dex_file(DexFile::Open(mem_map_->Begin(),
+                                                                 mem_map_->Size(),
+                                                                 location,
+                                                                 header_->Checksum(),
+                                                                 /*oat_dex_file*/ nullptr,
+                                                                 /*verify*/ true,
+                                                                 /*verify_checksum*/ false,
+                                                                 &error_msg));
+    DCHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << error_msg;
+  }
+  // Do IR-level comparison between input and output. This check ignores potential differences
+  // due to layout, so offsets are not checked. Instead, it checks the data contents of each item.
+  if (options_.verify_output_) {
+    std::unique_ptr<dex_ir::Header> orig_header(dex_ir::DexIrBuilder(*dex_file));
+    CHECK(VerifyOutputDexFile(orig_header.get(), header_, &error_msg)) << error_msg;
+  }
 }
 
 /*
@@ -1690,6 +1768,11 @@
     return;
   }
 
+  if (options_.show_section_statistics_) {
+    ShowDexSectionStatistics(header_, dex_file_index);
+    return;
+  }
+
   // Dump dex file.
   if (options_.dump_) {
     DumpDexFile();
@@ -1700,7 +1783,7 @@
     if (info_ != nullptr) {
       LayoutOutputFile(dex_file);
     }
-    OutputDexFile(dex_file->GetLocation());
+    OutputDexFile(dex_file);
   }
 }
 
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index ac1a4a6..f26b423 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -56,7 +56,9 @@
   bool show_annotations_ = false;
   bool show_file_headers_ = false;
   bool show_section_headers_ = false;
+  bool show_section_statistics_ = false;
   bool verbose_ = false;
+  bool verify_output_ = false;
   bool visualize_pattern_ = false;
   OutputFormat output_format_ = kOutputPlain;
   const char* output_dex_directory_ = nullptr;
@@ -105,15 +107,16 @@
   void DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init);
   void DumpDexFile();
 
-  std::vector<dex_ir::ClassDef*> LayoutClassDefsAndClassData(const DexFile* dex_file);
-  int32_t LayoutCodeItems(std::vector<dex_ir::ClassDef*> new_class_def_order);
+  std::vector<dex_ir::ClassData*> LayoutClassDefsAndClassData(const DexFile* dex_file);
+  int32_t LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order);
+  bool IsNextSectionCodeItemAligned(uint32_t offset);
   template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
   void FixupSections(uint32_t offset, uint32_t diff);
 
   // Creates a new layout for the dex file based on profile info.
   // Currently reorders ClassDefs, ClassDataItems, and CodeItems.
   void LayoutOutputFile(const DexFile* dex_file);
-  void OutputDexFile(const std::string& dex_file_location);
+  void OutputDexFile(const DexFile* dex_file);
 
   void DumpCFG(const DexFile* dex_file, int idx);
   void DumpCFG(const DexFile* dex_file, uint32_t dex_method_idx, const DexFile::CodeItem* code);
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index ad599ae..38faf96 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -1,4 +1,4 @@
-/*
+  /*
  * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -44,7 +44,7 @@
 static void Usage(void) {
   fprintf(stderr, "Copyright (C) 2016 The Android Open Source Project\n\n");
   fprintf(stderr, "%s: [-a] [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile] [-p profile]"
-                  " [-s] [-w directory] dexfile...\n\n", kProgramName);
+                  " [-s] [-t] [-v] [-w directory] dexfile...\n\n", kProgramName);
   fprintf(stderr, " -a : display annotations\n");
   fprintf(stderr, " -b : build dex_ir\n");
   fprintf(stderr, " -c : verify checksum and exit\n");
@@ -57,6 +57,8 @@
   fprintf(stderr, " -o : output file name (defaults to stdout)\n");
   fprintf(stderr, " -p : profile file name (defaults to no profile)\n");
   fprintf(stderr, " -s : visualize reference pattern\n");
+  fprintf(stderr, " -t : display file section sizes\n");
+  fprintf(stderr, " -v : verify output file is canonical to input (IR level comparison)\n");
   fprintf(stderr, " -w : output dex directory \n");
 }
 
@@ -75,7 +77,7 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "abcdefghil:mo:p:sw:");
+    const int ic = getopt(argc, argv, "abcdefghil:mo:p:stvw:");
     if (ic < 0) {
       break;  // done
     }
@@ -127,6 +129,13 @@
         options.visualize_pattern_ = true;
         options.verbose_ = false;
         break;
+      case 't':  // display section statistics
+        options.show_section_statistics_ = true;
+        options.verbose_ = false;
+        break;
+      case 'v':  // verify output
+        options.verify_output_ = true;
+        break;
       case 'w':  // output dex files directory
         options.output_dex_directory_ = optarg;
         break;
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 2d85e8f..bd6548e 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -41,7 +41,7 @@
     "AAAAdQEAAAAQAAABAAAAjAEAAA==";
 
 static const char kDexFileLayoutInputProfile[] =
-    "cHJvADAwMwABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
+    "cHJvADAwNAABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
 
 static const char kDexFileLayoutExpectedOutputDex[] =
     "ZGV4CjAzNQD1KW3+B8NAB0f2A/ZVIBJ0aHrGIqcpVTAUAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAH"
@@ -55,19 +55,170 @@
     "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
     "AAAAdQEAAAAQAAABAAAAjAEAAA==";
 
-static void WriteFileBase64(const char* base64, const char* location) {
+// Dex file with catch handler unreferenced by try blocks.
+// Constructed by building a dex file with try/catch blocks and hex editing.
+static const char kUnreferencedCatchHandlerInputDex[] =
+    "ZGV4CjAzNQD+exd52Y0f9nY5x5GmInXq5nXrO6Kl2RV4AwAAcAAAAHhWNBIAAAAAAAAAANgCAAAS"
+    "AAAAcAAAAAgAAAC4AAAAAwAAANgAAAABAAAA/AAAAAQAAAAEAQAAAQAAACQBAAA0AgAARAEAANYB"
+    "AADeAQAA5gEAAO4BAAAAAgAADwIAACYCAAA9AgAAUQIAAGUCAAB5AgAAfwIAAIUCAACIAgAAjAIA"
+    "AKECAACnAgAArAIAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAwAAAAOAAAADAAAAAYAAAAAAAAA"
+    "DQAAAAYAAADIAQAADQAAAAYAAADQAQAABQABABAAAAAAAAAAAAAAAAAAAgAPAAAAAQABABEAAAAD"
+    "AAAAAAAAAAAAAAABAAAAAwAAAAAAAAADAAAAAAAAAMgCAAAAAAAAAQABAAEAAAC1AgAABAAAAHAQ"
+    "AwAAAA4AAwABAAIAAgC6AgAAIQAAAGIAAAAaAQoAbiACABAAYgAAABoBCwBuIAIAEAAOAA0AYgAA"
+    "ABoBAQBuIAIAEAAo8A0AYgAAABoBAgBuIAIAEAAo7gAAAAAAAAcAAQAHAAAABwABAAIBAg8BAhgA"
+    "AQAAAAQAAAABAAAABwAGPGluaXQ+AAZDYXRjaDEABkNhdGNoMgAQSGFuZGxlclRlc3QuamF2YQAN"
+    "TEhhbmRsZXJUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABVMamF2YS9sYW5nL0V4Y2VwdGlv"
+    "bjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9sYW5nL1N5"
+    "c3RlbTsABFRyeTEABFRyeTIAAVYAAlZMABNbTGphdmEvbGFuZy9TdHJpbmc7AARtYWluAANvdXQA"
+    "B3ByaW50bG4AAQAHDgAEAQAHDn17AncdHoseAAAAAgAAgYAExAIBCdwCAAANAAAAAAAAAAEAAAAA"
+    "AAAAAQAAABIAAABwAAAAAgAAAAgAAAC4AAAAAwAAAAMAAADYAAAABAAAAAEAAAD8AAAABQAAAAQA"
+    "AAAEAQAABgAAAAEAAAAkAQAAASAAAAIAAABEAQAAARAAAAIAAADIAQAAAiAAABIAAADWAQAAAyAA"
+    "AAIAAAC1AgAAACAAAAEAAADIAgAAABAAAAEAAADYAgAA";
+
+// Dex file with 0-size (catch all only) catch handler unreferenced by try blocks.
+// Constructed by building a dex file with try/catch blocks and hex editing.
+static const char kUnreferenced0SizeCatchHandlerInputDex[] =
+    "ZGV4CjAzNQCEbEEvMstSNpQpjPdfMEfUBS48cis2QRJoAwAAcAAAAHhWNBIAAAAAAAAAAMgCAAAR"
+    "AAAAcAAAAAcAAAC0AAAAAwAAANAAAAABAAAA9AAAAAQAAAD8AAAAAQAAABwBAAAsAgAAPAEAAOoB"
+    "AADyAQAABAIAABMCAAAqAgAAPgIAAFICAABmAgAAaQIAAG0CAACCAgAAhgIAAIoCAACQAgAAlQIA"
+    "AJ4CAACiAgAAAgAAAAMAAAAEAAAABQAAAAYAAAAHAAAACQAAAAcAAAAFAAAAAAAAAAgAAAAFAAAA"
+    "3AEAAAgAAAAFAAAA5AEAAAQAAQANAAAAAAAAAAAAAAAAAAIADAAAAAEAAQAOAAAAAgAAAAAAAAAA"
+    "AAAAAQAAAAIAAAAAAAAAAQAAAAAAAAC5AgAAAAAAAAEAAQABAAAApgIAAAQAAABwEAMAAAAOAAQA"
+    "AQACAAIAqwIAAC8AAABiAAAAGgEPAG4gAgAQAGIAAAAaAQoAbiACABAAYgAAABoBEABuIAIAEABi"
+    "AAAAGgELAG4gAgAQAA4ADQBiAQAAGgIKAG4gAgAhACcADQBiAQAAGgILAG4gAgAhACcAAAAAAAAA"
+    "BwABAA4AAAAHAAEAAgAdACYAAAABAAAAAwAAAAEAAAAGAAY8aW5pdD4AEEhhbmRsZXJUZXN0Lmph"
+    "dmEADUxIYW5kbGVyVGVzdDsAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwASTGphdmEvbGFuZy9PYmpl"
+    "Y3Q7ABJMamF2YS9sYW5nL1N0cmluZzsAEkxqYXZhL2xhbmcvU3lzdGVtOwABVgACVkwAE1tMamF2"
+    "YS9sYW5nL1N0cmluZzsAAmYxAAJmMgAEbWFpbgADb3V0AAdwcmludGxuAAJ0MQACdDIAAQAHDgAE"
+    "AQAHDnl7eXkCeB2bAAAAAgAAgYAEvAIBCdQCAA0AAAAAAAAAAQAAAAAAAAABAAAAEQAAAHAAAAAC"
+    "AAAABwAAALQAAAADAAAAAwAAANAAAAAEAAAAAQAAAPQAAAAFAAAABAAAAPwAAAAGAAAAAQAAABwB"
+    "AAABIAAAAgAAADwBAAABEAAAAgAAANwBAAACIAAAEQAAAOoBAAADIAAAAgAAAKYCAAAAIAAAAQAA"
+    "ALkCAAAAEAAAAQAAAMgCAAA=";
+
+// Dex file with an unreferenced catch handler at end of code item.
+// Constructed by building a dex file with try/catch blocks and hex editing.
+static const char kUnreferencedEndingCatchHandlerInputDex[] =
+    "ZGV4CjAzNQCEflufI6xGTDDRmLpbfYi6ujPrDLIwvYcEBAAAcAAAAHhWNBIAAAAAAAAAAGQDAAAT"
+    "AAAAcAAAAAgAAAC8AAAAAwAAANwAAAABAAAAAAEAAAUAAAAIAQAAAQAAADABAAC0AgAAUAEAAE4C"
+    "AABWAgAAXgIAAGYCAAB4AgAAhwIAAJ4CAAC1AgAAyQIAAN0CAADxAgAA9wIAAP0CAAAAAwAABAMA"
+    "ABkDAAAcAwAAIgMAACcDAAAEAAAABQAAAAYAAAAHAAAACAAAAAkAAAAMAAAADgAAAAwAAAAGAAAA"
+    "AAAAAA0AAAAGAAAAQAIAAA0AAAAGAAAASAIAAAUAAQARAAAAAAAAAAAAAAAAAAAADwAAAAAAAgAQ"
+    "AAAAAQABABIAAAADAAAAAAAAAAAAAAABAAAAAwAAAAAAAAADAAAAAAAAAFADAAAAAAAAAQABAAEA"
+    "AAAwAwAABAAAAHAQBAAAAA4AAgAAAAIAAgA1AwAAIQAAAGIAAAAaAQoAbiADABAAYgAAABoBCwBu"
+    "IAMAEAAOAA0AYgAAABoBAQBuIAMAEAAo8A0AYgAAABoBAgBuIAMAEAAo7gAAAAAAAAcAAQAHAAAA"
+    "BwABAAIBAg8BAhgAAwABAAIAAgBCAwAAIQAAAGIAAAAaAQoAbiADABAAYgAAABoBCwBuIAMAEAAO"
+    "AA0AYgAAABoBAQBuIAMAEAAo8A0AYgAAABoBAgBuIAMAEAAo7gAAAAAAAAcAAQAHAAAABwABAAIB"
+    "Ag8BAhgAAQAAAAQAAAABAAAABwAGPGluaXQ+AAZDYXRjaDEABkNhdGNoMgAQSGFuZGxlclRlc3Qu"
+    "amF2YQANTEhhbmRsZXJUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABVMamF2YS9sYW5nL0V4"
+    "Y2VwdGlvbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9s"
+    "YW5nL1N5c3RlbTsABFRyeTEABFRyeTIAAVYAAlZMABNbTGphdmEvbGFuZy9TdHJpbmc7AAFhAARt"
+    "YWluAANvdXQAB3ByaW50bG4AAQAHDgAEAAcOfHsCeB0eih4AEQEABw59ewJ3HR6LHgAAAAMAAIGA"
+    "BNACAQnoAgEJ1AMAAA0AAAAAAAAAAQAAAAAAAAABAAAAEwAAAHAAAAACAAAACAAAALwAAAADAAAA"
+    "AwAAANwAAAAEAAAAAQAAAAABAAAFAAAABQAAAAgBAAAGAAAAAQAAADABAAABIAAAAwAAAFABAAAB"
+    "EAAAAgAAAEACAAACIAAAEwAAAE4CAAADIAAAAwAAADADAAAAIAAAAQAAAFADAAAAEAAAAQAAAGQD"
+    "AAA=";
+
+// Dex file with multiple code items that have the same debug_info_off_. Constructed by a modified
+// dexlayout on XandY.
+static const char kDexFileDuplicateOffset[] =
+    "ZGV4CjAzNwAQfXfPCB8qCxo7MqdFhmHZQwCv8+udHD8MBAAAcAAAAHhWNBIAAAAAAAAAAFQDAAAT"
+    "AAAAcAAAAAgAAAC8AAAAAQAAANwAAAABAAAA6AAAAAUAAADwAAAAAwAAABgBAACUAgAAeAEAABQC"
+    "AAAeAgAAJgIAACsCAAAyAgAANwIAAFsCAAB7AgAAngIAALICAAC1AgAAvQIAAMUCAADIAgAA1QIA"
+    "AOkCAADvAgAA9QIAAPwCAAACAAAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAkAAAAHAAAA"
+    "AAAAAAIAAQASAAAAAAAAAAEAAAABAAAAAQAAAAIAAAAAAAAAAgAAAAEAAAAGAAAAAQAAAAAAAAAA"
+    "AAAABgAAAAAAAAAKAAAAAAAAACsDAAAAAAAAAQAAAAAAAAAGAAAAAAAAAAsAAAD0AQAANQMAAAAA"
+    "AAACAAAAAAAAAAAAAAAAAAAACwAAAAQCAAA/AwAAAAAAAAIAAAAUAwAAGgMAAAEAAAAjAwAAAQAB"
+    "AAEAAAAFAAAABAAAAHAQBAAAAA4AAQABAAEAAAAFAAAABAAAAHAQBAAAAA4AAQAAAAEAAAAFAAAA"
+    "CAAAACIAAQBwEAEAAABpAAAADgABAAEAAQAAAAUAAAAEAAAAcBAAAAAADgB4AQAAAAAAAAAAAAAA"
+    "AAAAhAEAAAAAAAAAAAAAAAAAAAg8Y2xpbml0PgAGPGluaXQ+AANMWDsABUxZJFo7AANMWTsAIkxk"
+    "YWx2aWsvYW5ub3RhdGlvbi9FbmNsb3NpbmdDbGFzczsAHkxkYWx2aWsvYW5ub3RhdGlvbi9Jbm5l"
+    "ckNsYXNzOwAhTGRhbHZpay9hbm5vdGF0aW9uL01lbWJlckNsYXNzZXM7ABJMamF2YS9sYW5nL09i"
+    "amVjdDsAAVYABlguamF2YQAGWS5qYXZhAAFaAAthY2Nlc3NGbGFncwASZW1pdHRlcjogamFjay00"
+    "LjI1AARuYW1lAAR0aGlzAAV2YWx1ZQABegARAAcOABMABw4AEgAHDnYAEQAHDgACAwERGAICBAIN"
+    "BAgPFwwCBQERHAEYAQAAAQAAgIAEjAMAAAEAAYCABKQDAQACAAAIAoiABLwDAYCABNwDAAAADwAA"
+    "AAAAAAABAAAAAAAAAAEAAAATAAAAcAAAAAIAAAAIAAAAvAAAAAMAAAABAAAA3AAAAAQAAAABAAAA"
+    "6AAAAAUAAAAFAAAA8AAAAAYAAAADAAAAGAEAAAMQAAACAAAAeAEAAAEgAAAEAAAAjAEAAAYgAAAC"
+    "AAAA9AEAAAIgAAATAAAAFAIAAAMgAAAEAAAA/wIAAAQgAAADAAAAFAMAAAAgAAADAAAAKwMAAAAQ"
+    "AAABAAAAVAMAAA==";
+
+// Dex file with null value for annotations_off in the annotation_set_ref_list.
+// Constructed by building a dex file with annotations and hex editing.
+static const char kNullSetRefListElementInputDex[] =
+    "ZGV4CjAzNQB1iA+7ZwgkF+7E6ZesYFc2lRAR3qnRAanwAwAAcAAAAHhWNBIAAAAAAAAAACADAAAS"
+    "AAAAcAAAAAgAAAC4AAAAAwAAANgAAAABAAAA/AAAAAQAAAAEAQAAAgAAACQBAACMAgAAZAEAAOgB"
+    "AADwAQAAAAIAAAMCAAAQAgAAIAIAADQCAABIAgAAawIAAI0CAAC1AgAAyAIAANECAADUAgAA2QIA"
+    "ANwCAADjAgAA6QIAAAMAAAAEAAAABQAAAAYAAAAHAAAACAAAAAkAAAAMAAAAAgAAAAMAAAAAAAAA"
+    "DAAAAAcAAAAAAAAADQAAAAcAAADgAQAABgAGAAsAAAAAAAEAAAAAAAAAAgAOAAAAAQAAABAAAAAC"
+    "AAEAAAAAAAAAAAAAAAAAAgAAAAAAAAABAAAAsAEAAAgDAAAAAAAAAQAAAAEmAAACAAAA2AEAAAoA"
+    "AADIAQAAFgMAAAAAAAACAAAAAAAAAHwBAAABAAAA/AIAAAAAAAABAAAAAgMAAAEAAQABAAAA8AIA"
+    "AAQAAABwEAMAAAAOAAIAAgAAAAAA9QIAAAEAAAAOAAAAAAAAAAAAAAAAAAAAAQAAAAEAAABkAQAA"
+    "cAEAAAAAAAAAAAAAAAAAAAEAAAAEAAAAAgAAAAMAAwAGPGluaXQ+AA5Bbm5vQ2xhc3MuamF2YQAB"
+    "TAALTEFubm9DbGFzczsADkxNeUFubm90YXRpb247ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZh"
+    "L2xhbmcvU3RyaW5nOwAhTGphdmEvbGFuZy9hbm5vdGF0aW9uL0Fubm90YXRpb247ACBMamF2YS9s"
+    "YW5nL2Fubm90YXRpb24vUmV0ZW50aW9uOwAmTGphdmEvbGFuZy9hbm5vdGF0aW9uL1JldGVudGlv"
+    "blBvbGljeTsAEU15QW5ub3RhdGlvbi5qYXZhAAdSVU5USU1FAAFWAANWTEwAAWEABWFOYW1lAARu"
+    "YW1lAAV2YWx1ZQABAAcOAAICAAAHDgABBQERGwABAQEQFw8AAAIAAICABIQDAQmcAwAAAAECgQgA"
+    "AAARAAAAAAAAAAEAAAAAAAAAAQAAABIAAABwAAAAAgAAAAgAAAC4AAAAAwAAAAMAAADYAAAABAAA"
+    "AAEAAAD8AAAABQAAAAQAAAAEAQAABgAAAAIAAAAkAQAAAhAAAAEAAABkAQAAAxAAAAMAAABwAQAA"
+    "ASAAAAIAAACEAQAABiAAAAIAAACwAQAAARAAAAIAAADYAQAAAiAAABIAAADoAQAAAyAAAAIAAADw"
+    "AgAABCAAAAIAAAD8AgAAACAAAAIAAAAIAwAAABAAAAEAAAAgAwAA";
+
+// Dex file with shared empty class data item for multiple class defs.
+// Constructing by building a dex file with multiple classes and hex editing.
+static const char kMultiClassDataInputDex[] =
+    "ZGV4CjAzNQALJgF9TtnLq748xVe/+wyxETrT9lTEiW6YAQAAcAAAAHhWNBIAAAAAAAAAADQBAAAI"
+    "AAAAcAAAAAQAAACQAAAAAAAAAAAAAAACAAAAoAAAAAAAAAAAAAAAAgAAALAAAACoAAAA8AAAAPAA"
+    "AAD4AAAAAAEAAAMBAAAIAQAADQEAACEBAAAkAQAAAgAAAAMAAAAEAAAABQAAAAEAAAAGAAAAAgAA"
+    "AAcAAAABAAAAAQYAAAMAAAAAAAAAAAAAAAAAAAAnAQAAAAAAAAIAAAABBgAAAwAAAAAAAAABAAAA"
+    "AAAAACcBAAAAAAAABkEuamF2YQAGQi5qYXZhAAFJAANMQTsAA0xCOwASTGphdmEvbGFuZy9PYmpl"
+    "Y3Q7AAFhAAFiAAAAAAABAAAAARkAAAAIAAAAAAAAAAEAAAAAAAAAAQAAAAgAAABwAAAAAgAAAAQA"
+    "AACQAAAABAAAAAIAAACgAAAABgAAAAIAAACwAAAAAiAAAAgAAADwAAAAACAAAAIAAAAnAQAAABAA"
+    "AAEAAAA0AQAA";
+
+// Dex file with code info followed by non 4-byte aligned section.
+// Constructed a dex file with code info followed by string data and hex edited.
+static const char kUnalignedCodeInfoInputDex[] =
+    "ZGV4CjAzNQDXJzXNb4iWn2SLhmLydW/8h1K9moERIw7UAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAG"
+    "AAAAcAAAAAMAAACIAAAAAQAAAJQAAAAAAAAAAAAAAAMAAACgAAAAAQAAALgAAAD8AAAA2AAAAAIB"
+    "AAAKAQAAEgEAABcBAAArAQAALgEAAAIAAAADAAAABAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAA"
+    "AAUAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAABAAAAAAAAADsBAAAAAAAAAQABAAEAAAAxAQAA"
+    "BAAAAHAQAgAAAA4AAQABAAAAAAA2AQAAAQAAAA4ABjxpbml0PgAGQS5qYXZhAANMQTsAEkxqYXZh"
+    "L2xhbmcvT2JqZWN0OwABVgABYQABAAcOAAMABw4AAAABAQCBgATYAQEB8AEAAAALAAAAAAAAAAEA"
+    "AAAAAAAAAQAAAAYAAABwAAAAAgAAAAMAAACIAAAAAwAAAAEAAACUAAAABQAAAAMAAACgAAAABgAA"
+    "AAEAAAC4AAAAASAAAAIAAADYAAAAAiAAAAYAAAACAQAAAyAAAAIAAAAxAQAAACAAAAEAAAA7AQAA"
+    "ABAAAAEAAABMAQAA";
+
+// Dex file with class data section preceding code items.
+// Constructed by passing dex file through dexmerger tool and hex editing.
+static const char kClassDataBeforeCodeInputDex[] =
+    "ZGV4CjAzNQCZKmCu3XXn4zvxCh5VH0gZNNobEAcsc49EAgAAcAAAAHhWNBIAAAAAAAAAAAQBAAAJ"
+    "AAAAcAAAAAQAAACUAAAAAgAAAKQAAAAAAAAAAAAAAAUAAAC8AAAAAQAAAOQAAABAAQAABAEAAPgB"
+    "AAAAAgAACAIAAAsCAAAQAgAAJAIAACcCAAAqAgAALQIAAAIAAAADAAAABAAAAAUAAAACAAAAAAAA"
+    "AAAAAAAFAAAAAwAAAAAAAAABAAEAAAAAAAEAAAAGAAAAAQAAAAcAAAABAAAACAAAAAIAAQAAAAAA"
+    "AQAAAAEAAAACAAAAAAAAAAEAAAAAAAAAjAEAAAAAAAALAAAAAAAAAAEAAAAAAAAAAQAAAAkAAABw"
+    "AAAAAgAAAAQAAACUAAAAAwAAAAIAAACkAAAABQAAAAUAAAC8AAAABgAAAAEAAADkAAAAABAAAAEA"
+    "AAAEAQAAACAAAAEAAACMAQAAASAAAAQAAACkAQAAAiAAAAkAAAD4AQAAAyAAAAQAAAAwAgAAAAAB"
+    "AwCBgASkAwEBvAMBAdADAQHkAwAAAQABAAEAAAAwAgAABAAAAHAQBAAAAA4AAgABAAAAAAA1AgAA"
+    "AgAAABIQDwACAAEAAAAAADoCAAACAAAAEiAPAAIAAQAAAAAAPwIAAAIAAAASMA8ABjxpbml0PgAG"
+    "QS5qYXZhAAFJAANMQTsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgABYQABYgABYwABAAcOAAMABw4A"
+    "BgAHDgAJAAcOAA==";
+
+static void WriteBase64ToFile(const char* base64, File* file) {
   // Decode base64.
   CHECK(base64 != nullptr);
   size_t length;
   std::unique_ptr<uint8_t[]> bytes(DecodeBase64(base64, &length));
-  CHECK(bytes.get() != nullptr);
-
-  // Write to provided file.
-  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
-  CHECK(file.get() != nullptr);
+  CHECK(bytes != nullptr);
   if (!file->WriteFully(bytes.get(), length)) {
     PLOG(FATAL) << "Failed to write base64 as file";
   }
+}
+
+static void WriteFileBase64(const char* base64, const char* location) {
+  // Write to provided file.
+  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
+  CHECK(file != nullptr);
+  WriteBase64ToFile(base64, file.get());
   if (file->FlushCloseOrErase() != 0) {
     PLOG(FATAL) << "Could not flush and close test file.";
   }
@@ -171,7 +322,7 @@
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
     std::vector<std::string> dexlayout_exec_argv =
-    { dexlayout, "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
+        { dexlayout, "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
     if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
       return false;
     }
@@ -188,6 +339,40 @@
     }
     return true;
   }
+
+  // Runs UnreferencedCatchHandlerTest & Unreferenced0SizeCatchHandlerTest.
+  bool UnreferencedCatchHandlerExec(std::string* error_msg, const char* filename) {
+    ScratchFile tmp_file;
+    std::string tmp_name = tmp_file.GetFilename();
+    size_t tmp_last_slash = tmp_name.rfind("/");
+    std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
+
+    // Write inputs and expected outputs.
+    std::string input_dex = tmp_dir + "classes.dex";
+    WriteFileBase64(filename, input_dex.c_str());
+    std::string output_dex = tmp_dir + "classes.dex.new";
+
+    std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+    EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+
+    std::vector<std::string> dexlayout_exec_argv =
+        { dexlayout, "-w", tmp_dir, "-o", "/dev/null", input_dex };
+    if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
+      return false;
+    }
+
+    // Diff input and output. They should be the same.
+    std::vector<std::string> diff_exec_argv = { "/usr/bin/diff", input_dex, output_dex };
+    if (!::art::Exec(diff_exec_argv, error_msg)) {
+      return false;
+    }
+
+    std::vector<std::string> rm_exec_argv = { "/bin/rm", input_dex, output_dex };
+    if (!::art::Exec(rm_exec_argv, error_msg)) {
+      return false;
+    }
+    return true;
+  }
 };
 
 
@@ -212,4 +397,119 @@
   ASSERT_TRUE(DexFileLayoutExec(&error_msg)) << error_msg;
 }
 
+TEST_F(DexLayoutTest, UnreferencedCatchHandler) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg,
+                                           kUnreferencedCatchHandlerInputDex)) << error_msg;
+}
+
+TEST_F(DexLayoutTest, Unreferenced0SizeCatchHandler) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg,
+                                           kUnreferenced0SizeCatchHandlerInputDex)) << error_msg;
+}
+
+TEST_F(DexLayoutTest, UnreferencedEndingCatchHandler) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg,
+                                           kUnreferencedEndingCatchHandlerInputDex)) << error_msg;
+}
+
+TEST_F(DexLayoutTest, DuplicateOffset) {
+  ScratchFile temp;
+  WriteBase64ToFile(kDexFileDuplicateOffset, temp.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv = {
+      dexlayout,
+      "-a",
+      "-i",
+      "-o",
+      "/dev/null",
+      temp.GetFilename()};
+  std::string error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
+}
+
+TEST_F(DexLayoutTest, NullSetRefListElement) {
+  ScratchFile temp;
+  WriteBase64ToFile(kNullSetRefListElementInputDex, temp.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-o", "/dev/null", temp.GetFilename() };
+  std::string error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
+}
+
+TEST_F(DexLayoutTest, MultiClassData) {
+  ScratchFile temp;
+  WriteBase64ToFile(kMultiClassDataInputDex, temp.GetFile());
+  ScratchFile temp2;
+  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
+  std::string error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
+}
+
+TEST_F(DexLayoutTest, UnalignedCodeInfo) {
+  ScratchFile temp;
+  WriteBase64ToFile(kUnalignedCodeInfoInputDex, temp.GetFile());
+  ScratchFile temp2;
+  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
+  std::string error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
+}
+
+TEST_F(DexLayoutTest, ClassDataBeforeCode) {
+  ScratchFile temp;
+  WriteBase64ToFile(kClassDataBeforeCodeInputDex, temp.GetFile());
+  ScratchFile temp2;
+  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
+  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
+  std::string error_msg;
+  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+  EXPECT_TRUE(result);
+  if (!result) {
+    LOG(ERROR) << "Error " << error_msg;
+  }
+}
+
 }  // namespace art
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index becb827..e767023 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -2210,13 +2210,13 @@
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_fields = dex_cache->GetResolvedFields();
           for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) {
-            auto* elem = mirror::DexCache::GetElementPtrSize(
-                resolved_fields, i, image_pointer_size);
+            auto* elem = mirror::DexCache::GetNativePairPtrSize(
+                resolved_fields, i, image_pointer_size).object;
             size_t run = 0;
             for (size_t j = i + 1;
-                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
-                                                                            j,
-                                                                            image_pointer_size);
+                 j != length &&
+                 elem == mirror::DexCache::GetNativePairPtrSize(
+                     resolved_fields, j, image_pointer_size).object;
                  ++j) {
               ++run;
             }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 491e739..dfaae7d 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -54,48 +54,6 @@
 
 namespace art {
 
-static bool LocationToFilename(const std::string& location, InstructionSet isa,
-                               std::string* filename) {
-  bool has_system = false;
-  bool has_cache = false;
-  // image_location = /system/framework/boot.art
-  // system_image_filename = /system/framework/<image_isa>/boot.art
-  std::string system_filename(GetSystemImageFilename(location.c_str(), isa));
-  if (OS::FileExists(system_filename.c_str())) {
-    has_system = true;
-  }
-
-  bool have_android_data = false;
-  bool dalvik_cache_exists = false;
-  bool is_global_cache = false;
-  std::string dalvik_cache;
-  GetDalvikCache(GetInstructionSetString(isa), false, &dalvik_cache,
-                 &have_android_data, &dalvik_cache_exists, &is_global_cache);
-
-  std::string cache_filename;
-  if (have_android_data && dalvik_cache_exists) {
-    // Always set output location even if it does not exist,
-    // so that the caller knows where to create the image.
-    //
-    // image_location = /system/framework/boot.art
-    // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
-    std::string error_msg;
-    if (GetDalvikCacheFilename(location.c_str(), dalvik_cache.c_str(),
-                               &cache_filename, &error_msg)) {
-      has_cache = true;
-    }
-  }
-  if (has_system) {
-    *filename = system_filename;
-    return true;
-  } else if (has_cache) {
-    *filename = cache_filename;
-    return true;
-  } else {
-    return false;
-  }
-}
-
 static const OatHeader* GetOatHeader(const ElfFile* elf_file) {
   uint64_t off = 0;
   if (!elf_file->GetSectionOffsetAndSize(".rodata", &off, nullptr)) {
@@ -106,28 +64,10 @@
   return oat_header;
 }
 
-// This function takes an elf file and reads the current patch delta value
-// encoded in its oat header value
-static bool ReadOatPatchDelta(const ElfFile* elf_file, off_t* delta, std::string* error_msg) {
-  const OatHeader* oat_header = GetOatHeader(elf_file);
-  if (oat_header == nullptr) {
-    *error_msg = "Unable to get oat header from elf file.";
-    return false;
-  }
-  if (!oat_header->IsValid()) {
-    *error_msg = "Elf file has an invalid oat header";
-    return false;
-  }
-  *delta = oat_header->GetImagePatchDelta();
-  return true;
-}
-
-static File* CreateOrOpen(const char* name, bool* created) {
+static File* CreateOrOpen(const char* name) {
   if (OS::FileExists(name)) {
-    *created = false;
     return OS::OpenFileReadWrite(name);
   } else {
-    *created = true;
     std::unique_ptr<File> f(OS::CreateEmptyFile(name));
     if (f.get() != nullptr) {
       if (fchmod(f->Fd(), 0644) != 0) {
@@ -206,12 +146,11 @@
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   ScopedObjectAccess soa(Thread::Current());
 
-  t.NewTiming("Image and oat Patching setup");
+  t.NewTiming("Image Patching setup");
   std::vector<gc::space::ImageSpace*> spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces();
   std::map<gc::space::ImageSpace*, std::unique_ptr<File>> space_to_file_map;
   std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>> space_to_memmap_map;
   std::map<gc::space::ImageSpace*, PatchOat> space_to_patchoat_map;
-  std::map<gc::space::ImageSpace*, bool> space_to_skip_patching_map;
 
   for (size_t i = 0; i < spaces.size(); ++i) {
     gc::space::ImageSpace* space = spaces[i];
@@ -255,8 +194,7 @@
     space_to_memmap_map.emplace(space, std::move(image));
   }
 
-  // Do a first pass over the image spaces. Symlink PIC oat and vdex files, and
-  // prepare PatchOat instances for the rest.
+  // Symlink PIC oat and vdex files and patch the image spaces in memory.
   for (size_t i = 0; i < spaces.size(); ++i) {
     gc::space::ImageSpace* space = spaces[i];
     std::string input_image_filename = space->GetImageFilename();
@@ -277,14 +215,17 @@
       return false;
     }
 
-    bool skip_patching_oat = false;
     MaybePic is_oat_pic = IsOatPic(elf.get());
     if (is_oat_pic >= ERROR_FIRST) {
       // Error logged by IsOatPic
       return false;
-    } else if (is_oat_pic == PIC) {
-      // Do not need to do ELF-file patching. Create a symlink and skip the ELF patching.
+    } else if (is_oat_pic == NOT_PIC) {
+      LOG(ERROR) << "patchoat cannot be used on non-PIC oat file: " << input_oat_file->GetPath();
+      return false;
+    } else {
+      CHECK(is_oat_pic == PIC);
 
+      // Create a symlink.
       std::string converted_image_filename = space->GetImageLocation();
       std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
       std::string output_image_filename = output_directory +
@@ -296,23 +237,16 @@
           ImageHeader::GetOatLocationFromImageLocation(output_image_filename);
 
       if (!ReplaceOatFileWithSymlink(input_oat_file->GetPath(),
-                                     output_oat_filename,
-                                     false,
-                                     true) ||
+                                     output_oat_filename) ||
           !SymlinkFile(input_vdex_filename, output_vdex_filename)) {
         // Errors already logged by above call.
         return false;
       }
-      // Don't patch the OAT, since we just symlinked it. Image still needs patching.
-      skip_patching_oat = true;
-    } else {
-      CHECK(is_oat_pic == NOT_PIC);
     }
 
     PatchOat& p = space_to_patchoat_map.emplace(space,
                                                 PatchOat(
                                                     isa,
-                                                    elf.release(),
                                                     space_to_memmap_map.find(space)->second.get(),
                                                     space->GetLiveBitmap(),
                                                     space->GetMemMap(),
@@ -320,36 +254,24 @@
                                                     &space_to_memmap_map,
                                                     timings)).first->second;
 
-    t.NewTiming("Patching files");
-    if (!skip_patching_oat && !p.PatchElf()) {
-      LOG(ERROR) << "Failed to patch oat file " << input_oat_file->GetPath();
-      return false;
-    }
+    t.NewTiming("Patching image");
     if (!p.PatchImage(i == 0)) {
       LOG(ERROR) << "Failed to patch image file " << input_image_filename;
       return false;
     }
-
-    space_to_skip_patching_map.emplace(space, skip_patching_oat);
   }
 
-  // Do a second pass over the image spaces. Patch image files, non-PIC oat files
-  // and symlink their corresponding vdex files.
+  // Write the patched image spaces.
   for (size_t i = 0; i < spaces.size(); ++i) {
     gc::space::ImageSpace* space = spaces[i];
-    std::string input_image_filename = space->GetImageFilename();
-    std::string input_vdex_filename =
-        ImageHeader::GetVdexLocationFromImageLocation(input_image_filename);
 
-    t.NewTiming("Writing files");
+    t.NewTiming("Writing image");
     std::string converted_image_filename = space->GetImageLocation();
     std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
     std::string output_image_filename = output_directory +
         (android::base::StartsWith(converted_image_filename, "/") ? "" : "/") +
         converted_image_filename;
-    bool new_oat_out;
-    std::unique_ptr<File>
-        output_image_file(CreateOrOpen(output_image_filename.c_str(), &new_oat_out));
+    std::unique_ptr<File> output_image_file(CreateOrOpen(output_image_filename.c_str()));
     if (output_image_file.get() == nullptr) {
       LOG(ERROR) << "Failed to open output image file at " << output_image_filename;
       return false;
@@ -362,48 +284,10 @@
     if (!success) {
       return false;
     }
-
-    bool skip_patching_oat = space_to_skip_patching_map.find(space)->second;
-    if (!skip_patching_oat) {
-      std::string output_vdex_filename =
-          ImageHeader::GetVdexLocationFromImageLocation(output_image_filename);
-      std::string output_oat_filename =
-          ImageHeader::GetOatLocationFromImageLocation(output_image_filename);
-
-      std::unique_ptr<File>
-          output_oat_file(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
-      if (output_oat_file.get() == nullptr) {
-        LOG(ERROR) << "Failed to open output oat file at " << output_oat_filename;
-        return false;
-      }
-      success = p.WriteElf(output_oat_file.get());
-      success = FinishFile(output_oat_file.get(), success);
-      if (success) {
-        success = SymlinkFile(input_vdex_filename, output_vdex_filename);
-      }
-      if (!success) {
-        return false;
-      }
-    }
   }
   return true;
 }
 
-bool PatchOat::WriteElf(File* out) {
-  TimingLogger::ScopedTiming t("Writing Elf File", timings_);
-
-  CHECK(oat_file_.get() != nullptr);
-  CHECK(out != nullptr);
-  size_t expect = oat_file_->Size();
-  if (out->WriteFully(reinterpret_cast<char*>(oat_file_->Begin()), expect) &&
-      out->SetLength(expect) == 0) {
-    return true;
-  } else {
-    LOG(ERROR) << "Writing to oat file " << out->GetPath() << " failed.";
-    return false;
-  }
-}
-
 bool PatchOat::WriteImage(File* out) {
   TimingLogger::ScopedTiming t("Writing image File", timings_);
   std::string error_msg;
@@ -466,22 +350,7 @@
 }
 
 bool PatchOat::ReplaceOatFileWithSymlink(const std::string& input_oat_filename,
-                                         const std::string& output_oat_filename,
-                                         bool output_oat_opened_from_fd,
-                                         bool new_oat_out) {
-  // Need a file when we are PIC, since we symlink over it. Refusing to symlink into FD.
-  if (output_oat_opened_from_fd) {
-    // TODO: installd uses --output-oat-fd. Should we change class linking logic for PIC?
-    LOG(ERROR) << "No output oat filename specified, needs filename for when we are PIC";
-    return false;
-  }
-
-  // Image was PIC. Create symlink where the oat is supposed to go.
-  if (!new_oat_out) {
-    LOG(ERROR) << "Oat file " << output_oat_filename << " already exists, refusing to overwrite";
-    return false;
-  }
-
+                                         const std::string& output_oat_filename) {
   // Delete the original file, since we won't need it.
   unlink(output_oat_filename.c_str());
 
@@ -665,17 +534,18 @@
         mirror::DexCache::SetElementPtrSize(copy_methods, j, copy, pointer_size);
       }
     }
-    ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
-    ArtField** relocated_fields = RelocatedAddressOfPointer(orig_fields);
+    mirror::FieldDexCacheType* orig_fields = orig_dex_cache->GetResolvedFields();
+    mirror::FieldDexCacheType* relocated_fields = RelocatedAddressOfPointer(orig_fields);
     copy_dex_cache->SetField64<false>(
         mirror::DexCache::ResolvedFieldsOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_fields)));
     if (orig_fields != nullptr) {
-      ArtField** copy_fields = RelocatedCopyOf(orig_fields);
+      mirror::FieldDexCacheType* copy_fields = RelocatedCopyOf(orig_fields);
       for (size_t j = 0, num = orig_dex_cache->NumResolvedFields(); j != num; ++j) {
-        ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, j, pointer_size);
-        ArtField* copy = RelocatedAddressOfPointer(orig);
-        mirror::DexCache::SetElementPtrSize(copy_fields, j, copy, pointer_size);
+        mirror::FieldDexCachePair orig =
+            mirror::DexCache::GetNativePairPtrSize(orig_fields, j, pointer_size);
+        mirror::FieldDexCachePair copy(RelocatedAddressOfPointer(orig.object), orig.index);
+        mirror::DexCache::SetNativePairPtrSize(copy_fields, j, copy, pointer_size);
       }
     }
     mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes();
@@ -807,133 +677,6 @@
       object->GetDataPtrSize(pointer_size)), pointer_size);
 }
 
-bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings,
-                     bool output_oat_opened_from_fd, bool new_oat_out) {
-  CHECK(input_oat != nullptr);
-  CHECK(output_oat != nullptr);
-  CHECK_GE(input_oat->Fd(), 0);
-  CHECK_GE(output_oat->Fd(), 0);
-  TimingLogger::ScopedTiming t("Setup Oat File Patching", timings);
-
-  std::string error_msg;
-  std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat,
-                                             PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
-  if (elf.get() == nullptr) {
-    LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
-    return false;
-  }
-
-  MaybePic is_oat_pic = IsOatPic(elf.get());
-  if (is_oat_pic >= ERROR_FIRST) {
-    // Error logged by IsOatPic
-    return false;
-  } else if (is_oat_pic == PIC) {
-    // Do not need to do ELF-file patching. Create a symlink and skip the rest.
-    // Any errors will be logged by the function call.
-    return ReplaceOatFileWithSymlink(input_oat->GetPath(),
-                                     output_oat->GetPath(),
-                                     output_oat_opened_from_fd,
-                                     new_oat_out);
-  } else {
-    CHECK(is_oat_pic == NOT_PIC);
-  }
-
-  PatchOat p(elf.release(), delta, timings);
-  t.NewTiming("Patch Oat file");
-  if (!p.PatchElf()) {
-    return false;
-  }
-
-  t.NewTiming("Writing oat file");
-  if (!p.WriteElf(output_oat)) {
-    return false;
-  }
-  return true;
-}
-
-template <typename ElfFileImpl>
-bool PatchOat::PatchOatHeader(ElfFileImpl* oat_file) {
-  auto rodata_sec = oat_file->FindSectionByName(".rodata");
-  if (rodata_sec == nullptr) {
-    return false;
-  }
-  OatHeader* oat_header = reinterpret_cast<OatHeader*>(oat_file->Begin() + rodata_sec->sh_offset);
-  if (!oat_header->IsValid()) {
-    LOG(ERROR) << "Elf file " << oat_file->GetFilePath() << " has an invalid oat header";
-    return false;
-  }
-  oat_header->RelocateOat(delta_);
-  return true;
-}
-
-bool PatchOat::PatchElf() {
-  if (oat_file_->Is64Bit()) {
-    return PatchElf<ElfFileImpl64>(oat_file_->GetImpl64());
-  } else {
-    return PatchElf<ElfFileImpl32>(oat_file_->GetImpl32());
-  }
-}
-
-template <typename ElfFileImpl>
-bool PatchOat::PatchElf(ElfFileImpl* oat_file) {
-  TimingLogger::ScopedTiming t("Fixup Elf Text Section", timings_);
-
-  // Fix up absolute references to locations within the boot image.
-  if (!oat_file->ApplyOatPatchesTo(".text", delta_)) {
-    return false;
-  }
-
-  // Update the OatHeader fields referencing the boot image.
-  if (!PatchOatHeader<ElfFileImpl>(oat_file)) {
-    return false;
-  }
-
-  bool need_boot_oat_fixup = true;
-  for (unsigned int i = 0; i < oat_file->GetProgramHeaderNum(); ++i) {
-    auto hdr = oat_file->GetProgramHeader(i);
-    if (hdr->p_type == PT_LOAD && hdr->p_vaddr == 0u) {
-      need_boot_oat_fixup = false;
-      break;
-    }
-  }
-  if (!need_boot_oat_fixup) {
-    // This is an app oat file that can be loaded at an arbitrary address in memory.
-    // Boot image references were patched above and there's nothing else to do.
-    return true;
-  }
-
-  // This is a boot oat file that's loaded at a particular address and we need
-  // to patch all absolute addresses, starting with ELF program headers.
-
-  t.NewTiming("Fixup Elf Headers");
-  // Fixup Phdr's
-  oat_file->FixupProgramHeaders(delta_);
-
-  t.NewTiming("Fixup Section Headers");
-  // Fixup Shdr's
-  oat_file->FixupSectionHeaders(delta_);
-
-  t.NewTiming("Fixup Dynamics");
-  oat_file->FixupDynamic(delta_);
-
-  t.NewTiming("Fixup Elf Symbols");
-  // Fixup dynsym
-  if (!oat_file->FixupSymbols(delta_, true)) {
-    return false;
-  }
-  // Fixup symtab
-  if (!oat_file->FixupSymbols(delta_, false)) {
-    return false;
-  }
-
-  t.NewTiming("Fixup Debug Sections");
-  if (!oat_file->FixupDebugSections(delta_)) {
-    return false;
-  }
-
-  return true;
-}
-
 static int orig_argc;
 static char** orig_argv;
 
@@ -968,32 +711,10 @@
   UsageError("Usage: patchoat [options]...");
   UsageError("");
   UsageError("  --instruction-set=<isa>: Specifies the instruction set the patched code is");
-  UsageError("      compiled for. Required if you use --input-oat-location");
-  UsageError("");
-  UsageError("  --input-oat-file=<file.oat>: Specifies the exact filename of the oat file to be");
-  UsageError("      patched.");
-  UsageError("");
-  UsageError("  --input-oat-fd=<file-descriptor>: Specifies the file-descriptor of the oat file");
-  UsageError("      to be patched.");
-  UsageError("");
-  UsageError("  --input-vdex-fd=<file-descriptor>: Specifies the file-descriptor of the vdex file");
-  UsageError("      associated with the oat file.");
-  UsageError("");
-  UsageError("  --input-oat-location=<file.oat>: Specifies the 'location' to read the patched");
-  UsageError("      oat file from. If used one must also supply the --instruction-set");
+  UsageError("      compiled for (required).");
   UsageError("");
   UsageError("  --input-image-location=<file.art>: Specifies the 'location' of the image file to");
-  UsageError("      be patched. If --instruction-set is not given it will use the instruction set");
-  UsageError("      extracted from the --input-oat-file.");
-  UsageError("");
-  UsageError("  --output-oat-file=<file.oat>: Specifies the exact file to write the patched oat");
-  UsageError("      file to.");
-  UsageError("");
-  UsageError("  --output-oat-fd=<file-descriptor>: Specifies the file-descriptor to write the");
-  UsageError("      patched oat file to.");
-  UsageError("");
-  UsageError("  --output-vdex-fd=<file-descriptor>: Specifies the file-descriptor to copy the");
-  UsageError("      the vdex file associated with the patch oat file to.");
+  UsageError("      be patched.");
   UsageError("");
   UsageError("  --output-image-file=<file.art>: Specifies the exact file to write the patched");
   UsageError("      image file to.");
@@ -1001,15 +722,6 @@
   UsageError("  --base-offset-delta=<delta>: Specify the amount to change the old base-offset by.");
   UsageError("      This value may be negative.");
   UsageError("");
-  UsageError("  --patched-image-location=<file.art>: Relocate the oat file to be the same as the");
-  UsageError("      image at the given location. If used one must also specify the");
-  UsageError("      --instruction-set flag. It will search for this image in the same way that");
-  UsageError("      is done when loading one.");
-  UsageError("");
-  UsageError("  --lock-output: Obtain a flock on output oat file before starting.");
-  UsageError("");
-  UsageError("  --no-lock-output: Do not attempt to obtain a flock on output oat file.");
-  UsageError("");
   UsageError("  --dump-timings: dump out patch timing information");
   UsageError("");
   UsageError("  --no-dump-timings: do not dump out patch timing information");
@@ -1018,34 +730,6 @@
   exit(EXIT_FAILURE);
 }
 
-static bool ReadBaseDelta(const char* name, off_t* delta, std::string* error_msg) {
-  CHECK(name != nullptr);
-  CHECK(delta != nullptr);
-  std::unique_ptr<File> file;
-  if (OS::FileExists(name)) {
-    file.reset(OS::OpenFileForReading(name));
-    if (file.get() == nullptr) {
-      *error_msg = "Failed to open file %s for reading";
-      return false;
-    }
-  } else {
-    *error_msg = "File %s does not exist";
-    return false;
-  }
-  CHECK(file.get() != nullptr);
-  ImageHeader hdr;
-  if (sizeof(hdr) != file->Read(reinterpret_cast<char*>(&hdr), sizeof(hdr), 0)) {
-    *error_msg = "Failed to read file %s";
-    return false;
-  }
-  if (!hdr.IsValid()) {
-    *error_msg = "%s does not contain a valid image header.";
-    return false;
-  }
-  *delta = hdr.GetPatchDelta();
-  return true;
-}
-
 static int patchoat_image(TimingLogger& timings,
                           InstructionSet isa,
                           const std::string& input_image_location,
@@ -1084,293 +768,6 @@
   return ret ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
-static int patchoat_oat(TimingLogger& timings,
-                        InstructionSet isa,
-                        const std::string& patched_image_location,
-                        off_t base_delta,
-                        bool base_delta_set,
-                        int input_oat_fd,
-                        int input_vdex_fd,
-                        const std::string& input_oat_location,
-                        std::string input_oat_filename,
-                        bool have_input_oat,
-                        int output_oat_fd,
-                        int output_vdex_fd,
-                        std::string output_oat_filename,
-                        bool have_output_oat,
-                        bool lock_output,
-                        bool debug) {
-  {
-    // Only 1 of these may be set.
-    uint32_t cnt = 0;
-    cnt += (base_delta_set) ? 1 : 0;
-    cnt += (!patched_image_location.empty()) ? 1 : 0;
-    if (cnt > 1) {
-      Usage("Only one of --base-offset-delta or --patched-image-location may be used.");
-    } else if (cnt == 0) {
-      Usage("Must specify --base-offset-delta or --patched-image-location.");
-    }
-  }
-
-  if (!have_input_oat || !have_output_oat) {
-    Usage("Both input and output oat must be supplied to patch an app odex.");
-  }
-
-  if (!input_oat_location.empty()) {
-    if (!LocationToFilename(input_oat_location, isa, &input_oat_filename)) {
-      Usage("Unable to find filename for input oat location %s", input_oat_location.c_str());
-    }
-    if (debug) {
-      LOG(INFO) << "Using input-oat-file " << input_oat_filename;
-    }
-  }
-
-  if ((input_oat_fd == -1) != (input_vdex_fd == -1)) {
-    Usage("Either both input oat and vdex have to be passed as file descriptors or none of them");
-  } else if ((output_oat_fd == -1) != (output_vdex_fd == -1)) {
-    Usage("Either both output oat and vdex have to be passed as file descriptors or none of them");
-  }
-
-  bool match_delta = false;
-  if (!patched_image_location.empty()) {
-    std::string system_filename;
-    bool has_system = false;
-    std::string cache_filename;
-    bool has_cache = false;
-    bool has_android_data_unused = false;
-    bool is_global_cache = false;
-    if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
-                                                  &system_filename, &has_system, &cache_filename,
-                                                  &has_android_data_unused, &has_cache,
-                                                  &is_global_cache)) {
-      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
-    }
-    std::string patched_image_filename;
-    if (has_cache) {
-      patched_image_filename = cache_filename;
-    } else if (has_system) {
-      LOG(WARNING) << "Only image file found was in /system for image location "
-          << patched_image_location;
-      patched_image_filename = system_filename;
-    } else {
-      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
-    }
-    if (debug) {
-      LOG(INFO) << "Using patched-image-file " << patched_image_filename;
-    }
-
-    base_delta_set = true;
-    match_delta = true;
-    std::string error_msg;
-    if (!ReadBaseDelta(patched_image_filename.c_str(), &base_delta, &error_msg)) {
-      Usage(error_msg.c_str(), patched_image_filename.c_str());
-    }
-  }
-
-  if (!IsAligned<kPageSize>(base_delta)) {
-    Usage("Base offset/delta must be alligned to a pagesize (0x%08x) boundary.", kPageSize);
-  }
-
-  // We can symlink VDEX only if we have both input and output specified as filenames.
-  // Store that piece of information before we possibly create bogus filenames for
-  // files passed as file descriptors.
-  bool symlink_vdex = !input_oat_filename.empty() && !output_oat_filename.empty();
-
-  // Infer names of VDEX files.
-  std::string input_vdex_filename;
-  std::string output_vdex_filename;
-  if (!input_oat_filename.empty()) {
-    input_vdex_filename = ReplaceFileExtension(input_oat_filename, "vdex");
-  }
-  if (!output_oat_filename.empty()) {
-    output_vdex_filename = ReplaceFileExtension(output_oat_filename, "vdex");
-  }
-
-  // Do we need to cleanup output files if we fail?
-  bool new_oat_out = false;
-  bool new_vdex_out = false;
-
-  std::unique_ptr<File> input_oat;
-  std::unique_ptr<File> output_oat;
-
-  if (input_oat_fd != -1) {
-    if (input_oat_filename.empty()) {
-      input_oat_filename = "input-oat-file";
-    }
-    input_oat.reset(new File(input_oat_fd, input_oat_filename, false));
-    if (input_oat_fd == output_oat_fd) {
-      input_oat.get()->DisableAutoClose();
-    }
-    if (input_oat == nullptr) {
-      // Unlikely, but ensure exhaustive logging in non-0 exit code case
-      LOG(ERROR) << "Failed to open input oat file by its FD" << input_oat_fd;
-      return EXIT_FAILURE;
-    }
-  } else {
-    CHECK(!input_oat_filename.empty());
-    input_oat.reset(OS::OpenFileForReading(input_oat_filename.c_str()));
-    if (input_oat == nullptr) {
-      int err = errno;
-      LOG(ERROR) << "Failed to open input oat file " << input_oat_filename
-          << ": " << strerror(err) << "(" << err << ")";
-      return EXIT_FAILURE;
-    }
-  }
-
-  std::string error_msg;
-  std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat.get(), PROT_READ, MAP_PRIVATE, &error_msg));
-  if (elf.get() == nullptr) {
-    LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
-    return EXIT_FAILURE;
-  }
-  if (!elf->HasSection(".text.oat_patches")) {
-    LOG(ERROR) << "missing oat patch section in input oat file " << input_oat->GetPath();
-    return EXIT_FAILURE;
-  }
-
-  if (output_oat_fd != -1) {
-    if (output_oat_filename.empty()) {
-      output_oat_filename = "output-oat-file";
-    }
-    output_oat.reset(new File(output_oat_fd, output_oat_filename, true));
-    if (output_oat == nullptr) {
-      // Unlikely, but ensure exhaustive logging in non-0 exit code case
-      LOG(ERROR) << "Failed to open output oat file by its FD" << output_oat_fd;
-    }
-  } else {
-    CHECK(!output_oat_filename.empty());
-    output_oat.reset(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
-    if (output_oat == nullptr) {
-      int err = errno;
-      LOG(ERROR) << "Failed to open output oat file " << output_oat_filename
-          << ": " << strerror(err) << "(" << err << ")";
-    }
-  }
-
-  // Open VDEX files if we are not symlinking them.
-  std::unique_ptr<File> input_vdex;
-  std::unique_ptr<File> output_vdex;
-  if (symlink_vdex) {
-    new_vdex_out = !OS::FileExists(output_vdex_filename.c_str());
-  } else {
-    if (input_vdex_fd != -1) {
-      input_vdex.reset(new File(input_vdex_fd, input_vdex_filename, true));
-      if (input_vdex == nullptr) {
-        // Unlikely, but ensure exhaustive logging in non-0 exit code case
-        LOG(ERROR) << "Failed to open input vdex file by its FD" << input_vdex_fd;
-      }
-    } else {
-      input_vdex.reset(OS::OpenFileForReading(input_vdex_filename.c_str()));
-      if (input_vdex == nullptr) {
-        PLOG(ERROR) << "Failed to open input vdex file " << input_vdex_filename;
-        return EXIT_FAILURE;
-      }
-    }
-    if (output_vdex_fd != -1) {
-      output_vdex.reset(new File(output_vdex_fd, output_vdex_filename, true));
-      if (output_vdex == nullptr) {
-        // Unlikely, but ensure exhaustive logging in non-0 exit code case
-        LOG(ERROR) << "Failed to open output vdex file by its FD" << output_vdex_fd;
-      }
-    } else {
-      output_vdex.reset(CreateOrOpen(output_vdex_filename.c_str(), &new_vdex_out));
-      if (output_vdex == nullptr) {
-        PLOG(ERROR) << "Failed to open output vdex file " << output_vdex_filename;
-        return EXIT_FAILURE;
-      }
-    }
-  }
-
-  // TODO: get rid of this.
-  auto cleanup = [&output_oat_filename, &output_vdex_filename, &new_oat_out, &new_vdex_out]
-                 (bool success) {
-    if (!success) {
-      if (new_oat_out) {
-        CHECK(!output_oat_filename.empty());
-        unlink(output_oat_filename.c_str());
-      }
-      if (new_vdex_out) {
-        CHECK(!output_vdex_filename.empty());
-        unlink(output_vdex_filename.c_str());
-      }
-    }
-
-    if (kIsDebugBuild) {
-      LOG(INFO) << "Cleaning up.. success? " << success;
-    }
-  };
-
-  if (output_oat.get() == nullptr) {
-    cleanup(false);
-    return EXIT_FAILURE;
-  }
-
-  if (match_delta) {
-    // Figure out what the current delta is so we can match it to the desired delta.
-    off_t current_delta = 0;
-    if (!ReadOatPatchDelta(elf.get(), &current_delta, &error_msg)) {
-      LOG(ERROR) << "Unable to get current delta: " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    }
-    // Before this line base_delta is the desired final delta. We need it to be the actual amount to
-    // change everything by. We subtract the current delta from it to make it this.
-    base_delta -= current_delta;
-    if (!IsAligned<kPageSize>(base_delta)) {
-      LOG(ERROR) << "Given image file was relocated by an illegal delta";
-      cleanup(false);
-      return false;
-    }
-  }
-
-  if (debug) {
-    LOG(INFO) << "moving offset by " << base_delta
-        << " (0x" << std::hex << base_delta << ") bytes or "
-        << std::dec << (base_delta/kPageSize) << " pages.";
-  }
-
-  ScopedFlock output_oat_lock;
-  if (lock_output) {
-    if (!output_oat_lock.Init(output_oat.get(), &error_msg)) {
-      LOG(ERROR) << "Unable to lock output oat " << output_oat->GetPath() << ": " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    }
-  }
-
-  TimingLogger::ScopedTiming pt("patch oat", &timings);
-  bool ret = PatchOat::Patch(input_oat.get(), base_delta, output_oat.get(), &timings,
-                             output_oat_fd >= 0,  // was it opened from FD?
-                             new_oat_out);
-  ret = FinishFile(output_oat.get(), ret);
-
-  if (ret) {
-    if (symlink_vdex) {
-      ret = SymlinkFile(input_vdex_filename, output_vdex_filename);
-    } else {
-      ret = unix_file::CopyFile(*input_vdex.get(), output_vdex.get());
-    }
-  }
-
-  if (kIsDebugBuild) {
-    LOG(INFO) << "Exiting with return ... " << ret;
-  }
-  cleanup(ret);
-  return ret ? EXIT_SUCCESS : EXIT_FAILURE;
-}
-
-static int ParseFd(const StringPiece& option, const char* cmdline_arg) {
-  int fd;
-  const char* fd_str = option.substr(strlen(cmdline_arg)).data();
-  if (!ParseInt(fd_str, &fd)) {
-    Usage("Failed to parse %d argument '%s' as an integer", cmdline_arg, fd_str);
-  }
-  if (fd < 0) {
-    Usage("%s pass a negative value %d", cmdline_arg, fd);
-  }
-  return fd;
-}
-
 static int patchoat(int argc, char **argv) {
   InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
@@ -1392,23 +789,11 @@
   // cmd line args
   bool isa_set = false;
   InstructionSet isa = kNone;
-  std::string input_oat_filename;
-  std::string input_oat_location;
-  int input_oat_fd = -1;
-  int input_vdex_fd = -1;
-  bool have_input_oat = false;
   std::string input_image_location;
-  std::string output_oat_filename;
-  int output_oat_fd = -1;
-  int output_vdex_fd = -1;
-  bool have_output_oat = false;
   std::string output_image_filename;
   off_t base_delta = 0;
   bool base_delta_set = false;
-  std::string patched_image_filename;
-  std::string patched_image_location;
   bool dump_timings = kIsDebugBuild;
-  bool lock_output = true;
 
   for (int i = 0; i < argc; ++i) {
     const StringPiece option(argv[i]);
@@ -1423,42 +808,8 @@
       if (isa == kNone) {
         Usage("Unknown or invalid instruction set %s", isa_str);
       }
-    } else if (option.starts_with("--input-oat-location=")) {
-      if (have_input_oat) {
-        Usage("Only one of --input-oat-file, --input-oat-location and --input-oat-fd may be used.");
-      }
-      have_input_oat = true;
-      input_oat_location = option.substr(strlen("--input-oat-location=")).data();
-    } else if (option.starts_with("--input-oat-file=")) {
-      if (have_input_oat) {
-        Usage("Only one of --input-oat-file, --input-oat-location and --input-oat-fd may be used.");
-      }
-      have_input_oat = true;
-      input_oat_filename = option.substr(strlen("--input-oat-file=")).data();
-    } else if (option.starts_with("--input-oat-fd=")) {
-      if (have_input_oat) {
-        Usage("Only one of --input-oat-file, --input-oat-location and --input-oat-fd may be used.");
-      }
-      have_input_oat = true;
-      input_oat_fd = ParseFd(option, "--input-oat-fd=");
-    } else if (option.starts_with("--input-vdex-fd=")) {
-      input_vdex_fd = ParseFd(option, "--input-vdex-fd=");
     } else if (option.starts_with("--input-image-location=")) {
       input_image_location = option.substr(strlen("--input-image-location=")).data();
-    } else if (option.starts_with("--output-oat-file=")) {
-      if (have_output_oat) {
-        Usage("Only one of --output-oat-file, and --output-oat-fd may be used.");
-      }
-      have_output_oat = true;
-      output_oat_filename = option.substr(strlen("--output-oat-file=")).data();
-    } else if (option.starts_with("--output-oat-fd=")) {
-      if (have_output_oat) {
-        Usage("Only one of --output-oat-file, --output-oat-fd may be used.");
-      }
-      have_output_oat = true;
-      output_oat_fd = ParseFd(option, "--output-oat-fd=");
-    } else if (option.starts_with("--output-vdex-fd=")) {
-      output_vdex_fd = ParseFd(option, "--output-vdex-fd=");
     } else if (option.starts_with("--output-image-file=")) {
       output_image_filename = option.substr(strlen("--output-image-file=")).data();
     } else if (option.starts_with("--base-offset-delta=")) {
@@ -1467,12 +818,6 @@
       if (!ParseInt(base_delta_str, &base_delta)) {
         Usage("Failed to parse --base-offset-delta argument '%s' as an off_t", base_delta_str);
       }
-    } else if (option.starts_with("--patched-image-location=")) {
-      patched_image_location = option.substr(strlen("--patched-image-location=")).data();
-    } else if (option == "--lock-output") {
-      lock_output = true;
-    } else if (option == "--no-lock-output") {
-      lock_output = false;
     } else if (option == "--dump-timings") {
       dump_timings = true;
     } else if (option == "--no-dump-timings") {
@@ -1487,33 +832,13 @@
     Usage("Instruction set must be set.");
   }
 
-  int ret;
-  if (!input_image_location.empty()) {
-    ret = patchoat_image(timings,
-                         isa,
-                         input_image_location,
-                         output_image_filename,
-                         base_delta,
-                         base_delta_set,
-                         debug);
-  } else {
-    ret = patchoat_oat(timings,
-                       isa,
-                       patched_image_location,
-                       base_delta,
-                       base_delta_set,
-                       input_oat_fd,
-                       input_vdex_fd,
-                       input_oat_location,
-                       input_oat_filename,
-                       have_input_oat,
-                       output_oat_fd,
-                       output_vdex_fd,
-                       output_oat_filename,
-                       have_output_oat,
-                       lock_output,
-                       debug);
-  }
+  int ret = patchoat_image(timings,
+                           isa,
+                           input_image_location,
+                           output_image_filename,
+                           base_delta,
+                           base_delta_set,
+                           debug);
 
   timings.EndTiming();
   if (dump_timings) {
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index a519631..e15a6bc 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -44,17 +44,7 @@
 
 class PatchOat {
  public:
-  // Patch only the oat file
-  static bool Patch(File* oat_in, off_t delta, File* oat_out, TimingLogger* timings,
-                    bool output_oat_opened_from_fd,  // Was this using --oatput-oat-fd ?
-                    bool new_oat_out);               // Output oat was a new file created by us?
-
-  // Patch only the image (art file)
-  static bool Patch(const std::string& art_location, off_t delta, File* art_out, InstructionSet isa,
-                    TimingLogger* timings);
-
-  // Patch both the image and the oat file
-  static bool Patch(const std::string& art_location,
+  static bool Patch(const std::string& image_location,
                     off_t delta,
                     const std::string& output_directory,
                     InstructionSet isa,
@@ -64,18 +54,11 @@
   PatchOat(PatchOat&&) = default;
 
  private:
-  // Takes ownership only of the ElfFile. All other pointers are only borrowed.
-  PatchOat(ElfFile* oat_file, off_t delta, TimingLogger* timings)
-      : oat_file_(oat_file), image_(nullptr), bitmap_(nullptr), heap_(nullptr), delta_(delta),
-        isa_(kNone), space_map_(nullptr), timings_(timings) {}
-  PatchOat(InstructionSet isa, MemMap* image, gc::accounting::ContinuousSpaceBitmap* bitmap,
-           MemMap* heap, off_t delta, TimingLogger* timings)
-      : image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), isa_(isa), space_map_(nullptr), timings_(timings) {}
-  PatchOat(InstructionSet isa, ElfFile* oat_file, MemMap* image,
+  // All pointers are only borrowed.
+  PatchOat(InstructionSet isa, MemMap* image,
            gc::accounting::ContinuousSpaceBitmap* bitmap, MemMap* heap, off_t delta,
            std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>>* map, TimingLogger* timings)
-      : oat_file_(oat_file), image_(image), bitmap_(bitmap), heap_(heap),
+      : image_(image), bitmap_(bitmap), heap_(heap),
         delta_(delta), isa_(isa), space_map_(map), timings_(timings) {}
 
   // Was the .art image at image_path made with --compile-pic ?
@@ -94,9 +77,7 @@
   // Attempt to replace the file with a symlink
   // Returns false if it fails
   static bool ReplaceOatFileWithSymlink(const std::string& input_oat_filename,
-                                        const std::string& output_oat_filename,
-                                        bool output_oat_opened_from_fd,
-                                        bool new_oat_out);  // Output oat was newly created?
+                                        const std::string& output_oat_filename);
 
   static void BitmapCallback(mirror::Object* obj, void* arg)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -108,13 +89,6 @@
   void FixupMethod(ArtMethod* object, ArtMethod* copy)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Patches oat in place, modifying the oat_file given to the constructor.
-  bool PatchElf();
-  template <typename ElfFileImpl>
-  bool PatchElf(ElfFileImpl* oat_file);
-  template <typename ElfFileImpl>
-  bool PatchOatHeader(ElfFileImpl* oat_file);
-
   bool PatchImage(bool primary_image) REQUIRES_SHARED(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) REQUIRES_SHARED(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -128,7 +102,6 @@
   void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  bool WriteElf(File* out);
   bool WriteImage(File* out);
 
   template <typename T>
@@ -175,19 +148,6 @@
     return reinterpret_cast<T*>(ret);
   }
 
-  template <typename T>
-  T RelocatedAddressOfIntPointer(T obj) const {
-    if (obj == 0) {
-      return obj;
-    }
-    T ret = obj + delta_;
-    // Trim off high bits in case negative relocation with 64 bit patchoat.
-    if (Is32BitISA()) {
-      ret = static_cast<T>(static_cast<uint32_t>(ret));
-    }
-    return ret;
-  }
-
   bool Is32BitISA() const {
     return InstructionSetPointerSize(isa_) == PointerSize::k32;
   }
@@ -213,8 +173,6 @@
     mirror::Object* const copy_;
   };
 
-  // The elf file we are patching.
-  std::unique_ptr<ElfFile> oat_file_;
   // A mmap of the image we are patching. This is modified.
   const MemMap* const image_;
   // The bitmap over the image within the heap we are patching. This is not modified.
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index d395c17..52f3b52 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -16,11 +16,14 @@
 
 #include <gtest/gtest.h>
 
+#include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
 #include "exec_utils.h"
-#include "profile_assistant.h"
 #include "jit/profile_compilation_info.h"
+#include "mirror/class-inl.h"
+#include "profile_assistant.h"
+#include "scoped_thread_state_change-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -95,10 +98,12 @@
     return ExecAndReturnCode(argv_str, &error);
   }
 
-  bool CreateProfile(std::string class_file_contents, const std::string& filename) {
+  bool CreateProfile(std::string profile_file_contents,
+                     const std::string& filename,
+                     const std::string& dex_location) {
     ScratchFile class_names_file;
     File* file = class_names_file.GetFile();
-    EXPECT_TRUE(file->WriteFully(class_file_contents.c_str(), class_file_contents.length()));
+    EXPECT_TRUE(file->WriteFully(profile_file_contents.c_str(), profile_file_contents.length()));
     EXPECT_EQ(0, file->Flush());
     EXPECT_TRUE(file->ResetOffset());
     std::string profman_cmd = GetProfmanCmd();
@@ -106,8 +111,8 @@
     argv_str.push_back(profman_cmd);
     argv_str.push_back("--create-profile-from=" + class_names_file.GetFilename());
     argv_str.push_back("--reference-profile-file=" + filename);
-    argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
-    argv_str.push_back("--dex-location=classes.dex");
+    argv_str.push_back("--apk=" + dex_location);
+    argv_str.push_back("--dex-location=" + dex_location);
     std::string error;
     EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0);
     return true;
@@ -121,7 +126,7 @@
     argv_str.push_back("--dump-classes");
     argv_str.push_back("--profile-file=" + filename);
     argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
-    argv_str.push_back("--dex-location=classes.dex");
+    argv_str.push_back("--dex-location=" + GetLibCoreDexFileNames()[0]);
     argv_str.push_back("--dump-output-to-fd=" + std::to_string(GetFd(class_names_file)));
     std::string error;
     EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0);
@@ -137,11 +142,74 @@
 
   bool CreateAndDump(const std::string& input_file_contents, std::string* output_file_contents) {
     ScratchFile profile_file;
-    EXPECT_TRUE(CreateProfile(input_file_contents, profile_file.GetFilename()));
+    EXPECT_TRUE(CreateProfile(input_file_contents,
+                              profile_file.GetFilename(),
+                              GetLibCoreDexFileNames()[0]));
     profile_file.GetFile()->ResetOffset();
     EXPECT_TRUE(DumpClasses(profile_file.GetFilename(), output_file_contents));
     return true;
   }
+
+  mirror::Class* GetClass(jobject class_loader, const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(
+        hs.NewHandle(self->DecodeJObject(class_loader)->AsClassLoader()));
+    return class_linker->FindClass(self, clazz.c_str(), h_loader);
+  }
+
+  ArtMethod* GetVirtualMethod(jobject class_loader,
+                              const std::string& clazz,
+                              const std::string& name) {
+    mirror::Class* klass = GetClass(class_loader, clazz);
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    ArtMethod* method = nullptr;
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      if (name == m.GetName()) {
+        EXPECT_TRUE(method == nullptr);
+        method = &m;
+      }
+    }
+    return method;
+  }
+
+  // Verify that given method has the expected inline caches and nothing else.
+  void AssertInlineCaches(ArtMethod* method,
+                          const std::set<mirror::Class*>& expected_clases,
+                          const ProfileCompilationInfo& info,
+                          bool is_megamorphic,
+                          bool is_missing_types)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+    ASSERT_TRUE(info.GetMethod(method->GetDexFile()->GetLocation(),
+                               method->GetDexFile()->GetLocationChecksum(),
+                               method->GetDexMethodIndex(),
+                               &pmi));
+    ASSERT_EQ(pmi.inline_caches.size(), 1u);
+    ProfileCompilationInfo::DexPcData dex_pc_data = pmi.inline_caches.begin()->second;
+
+    ASSERT_EQ(dex_pc_data.is_megamorphic, is_megamorphic);
+    ASSERT_EQ(dex_pc_data.is_missing_types, is_missing_types);
+    ASSERT_EQ(expected_clases.size(), dex_pc_data.classes.size());
+    size_t found = 0;
+    for (mirror::Class* it : expected_clases) {
+      for (const auto& class_ref : dex_pc_data.classes) {
+        ProfileCompilationInfo::DexReference dex_ref =
+            pmi.dex_references[class_ref.dex_profile_index];
+        if (dex_ref.MatchesDex(&(it->GetDexFile())) &&
+            class_ref.type_index == it->GetDexTypeIndex()) {
+          found++;
+        }
+      }
+    }
+
+    ASSERT_EQ(expected_clases.size(), found);
+  }
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -358,25 +426,28 @@
 TEST_F(ProfileAssistantTest, TestProfileCreationAllMatch) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
-    "java.lang.Comparable",
-    "java.lang.Math",
-    "java.lang.Object"
+    "Ljava/lang/Comparable;",
+    "Ljava/lang/Math;",
+    "Ljava/lang/Object;"
   };
   std::string input_file_contents;
+  std::string expected_contents;
   for (std::string& class_name : class_names) {
     input_file_contents += class_name + std::string("\n");
+    expected_contents += DescriptorToDot(class_name.c_str()) +
+        std::string("\n");
   }
   std::string output_file_contents;
   ASSERT_TRUE(CreateAndDump(input_file_contents, &output_file_contents));
-  ASSERT_EQ(output_file_contents, input_file_contents);
+  ASSERT_EQ(output_file_contents, expected_contents);
 }
 
 TEST_F(ProfileAssistantTest, TestProfileCreationOneNotMatched) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
-    "doesnt.match.this.one",
-    "java.lang.Comparable",
-    "java.lang.Object"
+    "Ldoesnt/match/this/one;",
+    "Ljava/lang/Comparable;",
+    "Ljava/lang/Object;"
   };
   std::string input_file_contents;
   for (std::string& class_name : class_names) {
@@ -385,16 +456,17 @@
   std::string output_file_contents;
   ASSERT_TRUE(CreateAndDump(input_file_contents, &output_file_contents));
   std::string expected_contents =
-      class_names[1] + std::string("\n") + class_names[2] + std::string("\n");
+      DescriptorToDot(class_names[1].c_str()) + std::string("\n") +
+      DescriptorToDot(class_names[2].c_str()) + std::string("\n");
   ASSERT_EQ(output_file_contents, expected_contents);
 }
 
 TEST_F(ProfileAssistantTest, TestProfileCreationNoneMatched) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
-    "doesnt.match.this.one",
-    "doesnt.match.this.one.either",
-    "nor.this.one"
+    "Ldoesnt/match/this/one;",
+    "Ldoesnt/match/this/one/either;",
+    "Lnor/this/one;"
   };
   std::string input_file_contents;
   for (std::string& class_name : class_names) {
@@ -406,4 +478,115 @@
   ASSERT_EQ(output_file_contents, expected_contents);
 }
 
+TEST_F(ProfileAssistantTest, TestProfileCreateInlineCache) {
+  // Create the profile content.
+  std::vector<std::string> methods = {
+    "LTestInline;->inlineMonomorphic(LSuper;)I+LSubA;",
+    "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;",
+    "LTestInline;->inlineMegamorphic(LSuper;)I+LSubA;,LSubB;,LSubC;,LSubD;,LSubE;",
+    "LTestInline;->inlineMissingTypes(LSuper;)I+missing_types",
+    "LTestInline;->noInlineCache(LSuper;)I"
+  };
+  std::string input_file_contents;
+  for (std::string& m : methods) {
+    input_file_contents += m + std::string("\n");
+  }
+
+  // Create the profile and save it to disk.
+  ScratchFile profile_file;
+  ASSERT_TRUE(CreateProfile(input_file_contents,
+                            profile_file.GetFilename(),
+                            GetTestDexFileName("ProfileTestMultiDex")));
+
+  // Load the profile from disk.
+  ProfileCompilationInfo info;
+  profile_file.GetFile()->ResetOffset();
+  ASSERT_TRUE(info.Load(GetFd(profile_file)));
+
+  // Load the dex files and verify that the profile contains the expected methods info.
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("ProfileTestMultiDex");
+  ASSERT_NE(class_loader, nullptr);
+
+  mirror::Class* sub_a = GetClass(class_loader, "LSubA;");
+  mirror::Class* sub_b = GetClass(class_loader, "LSubB;");
+  mirror::Class* sub_c = GetClass(class_loader, "LSubC;");
+
+  ASSERT_TRUE(sub_a != nullptr);
+  ASSERT_TRUE(sub_b != nullptr);
+  ASSERT_TRUE(sub_c != nullptr);
+
+  {
+    // Verify that method inlineMonomorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_monomorphic = GetVirtualMethod(class_loader,
+                                                     "LTestInline;",
+                                                     "inlineMonomorphic");
+    ASSERT_TRUE(inline_monomorphic != nullptr);
+    std::set<mirror::Class*> expected_monomorphic;
+    expected_monomorphic.insert(sub_a);
+    AssertInlineCaches(inline_monomorphic,
+                       expected_monomorphic,
+                       info,
+                       /*megamorphic*/false,
+                       /*missing_types*/false);
+  }
+
+  {
+    // Verify that method inlinePolymorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_polymorhic = GetVirtualMethod(class_loader,
+                                                    "LTestInline;",
+                                                    "inlinePolymorphic");
+    ASSERT_TRUE(inline_polymorhic != nullptr);
+    std::set<mirror::Class*> expected_polymorphic;
+    expected_polymorphic.insert(sub_a);
+    expected_polymorphic.insert(sub_b);
+    expected_polymorphic.insert(sub_c);
+    AssertInlineCaches(inline_polymorhic,
+                       expected_polymorphic,
+                       info,
+                       /*megamorphic*/false,
+                       /*missing_types*/false);
+  }
+
+  {
+    // Verify that method inlineMegamorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_megamorphic = GetVirtualMethod(class_loader,
+                                                     "LTestInline;",
+                                                     "inlineMegamorphic");
+    ASSERT_TRUE(inline_megamorphic != nullptr);
+    std::set<mirror::Class*> expected_megamorphic;
+    AssertInlineCaches(inline_megamorphic,
+                       expected_megamorphic,
+                       info,
+                       /*megamorphic*/true,
+                       /*missing_types*/false);
+  }
+
+  {
+    // Verify that method inlineMegamorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_missing_types = GetVirtualMethod(class_loader,
+                                                       "LTestInline;",
+                                                       "inlineMissingTypes");
+    ASSERT_TRUE(inline_missing_types != nullptr);
+    std::set<mirror::Class*> expected_missing_Types;
+    AssertInlineCaches(inline_missing_types,
+                       expected_missing_Types,
+                       info,
+                       /*megamorphic*/false,
+                       /*missing_types*/true);
+  }
+
+  {
+    // Verify that method noInlineCache has no inline caches in the profile.
+    ArtMethod* no_inline_cache = GetVirtualMethod(class_loader, "LTestInline;", "noInlineCache");
+    ASSERT_TRUE(no_inline_cache != nullptr);
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi_no_inline_cache;
+    ASSERT_TRUE(info.GetMethod(no_inline_cache->GetDexFile()->GetLocation(),
+                               no_inline_cache->GetDexFile()->GetLocationChecksum(),
+                               no_inline_cache->GetDexMethodIndex(),
+                               &pmi_no_inline_cache));
+    ASSERT_TRUE(pmi_no_inline_cache.inline_caches.empty());
+  }
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index a42e4f1..f7316cc 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -36,6 +36,7 @@
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
+#include "bytecode_utils.h"
 #include "dex_file.h"
 #include "jit/profile_compilation_info.h"
 #include "runtime.h"
@@ -136,6 +137,15 @@
 static constexpr uint16_t kDefaultTestProfileMethodRatio = 5;
 static constexpr uint16_t kDefaultTestProfileClassRatio = 5;
 
+// Separators used when parsing human friendly representation of profiles.
+static const std::string kMethodSep = "->";
+static const std::string kMissingTypesMarker = "missing_types";
+static constexpr char kProfileParsingInlineChacheSep = '+';
+static constexpr char kProfileParsingTypeSep = ',';
+static constexpr char kProfileParsingFirstCharInSignature = '(';
+
+// TODO(calin): This class has grown too much from its initial design. Split the functionality
+// into smaller, more contained pieces.
 class ProfMan FINAL {
  public:
   ProfMan() :
@@ -522,6 +532,187 @@
     return output.release();
   }
 
+  // Find class klass_descriptor in the given dex_files and store its reference
+  // in the out parameter class_ref.
+  // Return true if the definition of the class was found in any of the dex_files.
+  bool FindClass(const std::vector<std::unique_ptr<const DexFile>>& dex_files,
+                 const std::string& klass_descriptor,
+                 /*out*/ProfileMethodInfo::ProfileClassReference* class_ref) {
+    for (const std::unique_ptr<const DexFile>& dex_file_ptr : dex_files) {
+      const DexFile* dex_file = dex_file_ptr.get();
+      const DexFile::TypeId* type_id = dex_file->FindTypeId(klass_descriptor.c_str());
+      if (type_id == nullptr) {
+        continue;
+      }
+      dex::TypeIndex type_index = dex_file->GetIndexForTypeId(*type_id);
+      if (dex_file->FindClassDef(type_index) == nullptr) {
+        // Class is only referenced in the current dex file but not defined in it.
+        continue;
+      }
+      class_ref->dex_file = dex_file;
+      class_ref->type_index = type_index;
+      return true;
+    }
+    return false;
+  }
+
+  // Find the method specified by method_spec in the class class_ref. The method
+  // must have a single INVOKE_VIRTUAL in its byte code.
+  // Upon success it returns true and stores the method index and the invoke dex pc
+  // in the output parameters.
+  // The format of the method spec is "inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;".
+  //
+  // TODO(calin): support INVOKE_INTERFACE and the range variants.
+  bool FindMethodWithSingleInvoke(const ProfileMethodInfo::ProfileClassReference& class_ref,
+                                  const std::string& method_spec,
+                                  /*out*/uint16_t* method_index,
+                                  /*out*/uint32_t* dex_pc) {
+    std::vector<std::string> name_and_signature;
+    Split(method_spec, kProfileParsingFirstCharInSignature, &name_and_signature);
+    if (name_and_signature.size() != 2) {
+      LOG(ERROR) << "Invalid method name and signature " << method_spec;
+    }
+    const std::string& name = name_and_signature[0];
+    const std::string& signature = kProfileParsingFirstCharInSignature + name_and_signature[1];
+    const DexFile* dex_file = class_ref.dex_file;
+
+    const DexFile::StringId* name_id = dex_file->FindStringId(name.c_str());
+    if (name_id == nullptr) {
+      LOG(ERROR) << "Could not find name: "  << name;
+      return false;
+    }
+    dex::TypeIndex return_type_idx;
+    std::vector<dex::TypeIndex> param_type_idxs;
+    if (!dex_file->CreateTypeList(signature, &return_type_idx, &param_type_idxs)) {
+      LOG(ERROR) << "Could not create type list" << signature;
+      return false;
+    }
+    const DexFile::ProtoId* proto_id = dex_file->FindProtoId(return_type_idx, param_type_idxs);
+    if (proto_id == nullptr) {
+      LOG(ERROR) << "Could not find proto_id: " << name;
+      return false;
+    }
+    const DexFile::MethodId* method_id = dex_file->FindMethodId(
+        dex_file->GetTypeId(class_ref.type_index), *name_id, *proto_id);
+    if (method_id == nullptr) {
+      LOG(ERROR) << "Could not find method_id: " << name;
+      return false;
+    }
+
+    *method_index = dex_file->GetIndexForMethodId(*method_id);
+
+    uint32_t offset = dex_file->FindCodeItemOffset(
+        *dex_file->FindClassDef(class_ref.type_index),
+        *method_index);
+    const DexFile::CodeItem* code_item = dex_file->GetCodeItem(offset);
+
+    bool found_invoke = false;
+    for (CodeItemIterator it(*code_item); !it.Done(); it.Advance()) {
+      if (it.CurrentInstruction().Opcode() == Instruction::INVOKE_VIRTUAL) {
+        if (found_invoke) {
+          LOG(ERROR) << "Multiple invoke INVOKE_VIRTUAL found: " << name;
+          return false;
+        }
+        found_invoke = true;
+        *dex_pc = it.CurrentDexPc();
+      }
+    }
+    if (!found_invoke) {
+      LOG(ERROR) << "Could not find any INVOKE_VIRTUAL: " << name;
+    }
+    return found_invoke;
+  }
+
+  // Process a line defining a class or a method and its inline caches.
+  // Upon success return true and add the class or the method info to profile.
+  // The possible line formats are:
+  // "LJustTheCass;".
+  // "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;".
+  // "LTestInline;->inlineMissingTypes(LSuper;)I+missing_types".
+  // "LTestInline;->inlineNoInlineCaches(LSuper;)I".
+  // The method and classes are searched only in the given dex files.
+  bool ProcessLine(const std::vector<std::unique_ptr<const DexFile>>& dex_files,
+                   const std::string& line,
+                   /*out*/ProfileCompilationInfo* profile) {
+    std::string klass;
+    std::string method_str;
+    size_t method_sep_index = line.find(kMethodSep);
+    if (method_sep_index == std::string::npos) {
+      klass = line;
+    } else {
+      klass = line.substr(0, method_sep_index);
+      method_str = line.substr(method_sep_index + kMethodSep.size());
+    }
+
+    ProfileMethodInfo::ProfileClassReference class_ref;
+    if (!FindClass(dex_files, klass, &class_ref)) {
+      LOG(WARNING) << "Could not find class: " << klass;
+      return false;
+    }
+
+    if (method_str.empty()) {
+      // No method to add. Just add the class.
+      std::set<DexCacheResolvedClasses> resolved_class_set;
+      const DexFile* dex_file = class_ref.dex_file;
+      const auto& dex_resolved_classes = resolved_class_set.emplace(
+            dex_file->GetLocation(),
+            dex_file->GetBaseLocation(),
+            dex_file->GetLocationChecksum());
+      dex_resolved_classes.first->AddClass(class_ref.type_index);
+      profile->AddMethodsAndClasses(std::vector<ProfileMethodInfo>(), resolved_class_set);
+      return true;
+    }
+
+    // Process the method.
+    std::string method_spec;
+    std::vector<std::string> inline_cache_elems;
+
+    std::vector<std::string> method_elems;
+    bool is_missing_types = false;
+    Split(method_str, kProfileParsingInlineChacheSep, &method_elems);
+    if (method_elems.size() == 2) {
+      method_spec = method_elems[0];
+      is_missing_types = method_elems[1] == kMissingTypesMarker;
+      if (!is_missing_types) {
+        Split(method_elems[1], kProfileParsingTypeSep, &inline_cache_elems);
+      }
+    } else if (method_elems.size() == 1) {
+      method_spec = method_elems[0];
+    } else {
+      LOG(ERROR) << "Invalid method line: " << line;
+      return false;
+    }
+
+    uint16_t method_index;
+    uint32_t dex_pc;
+    if (!FindMethodWithSingleInvoke(class_ref, method_spec, &method_index, &dex_pc)) {
+      return false;
+    }
+    std::vector<ProfileMethodInfo::ProfileClassReference> classes(inline_cache_elems.size());
+    size_t class_it = 0;
+    for (const std::string ic_class : inline_cache_elems) {
+      if (!FindClass(dex_files, ic_class, &(classes[class_it++]))) {
+        LOG(ERROR) << "Could not find class: " << ic_class;
+        return false;
+      }
+    }
+    std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
+    inline_caches.emplace_back(dex_pc, is_missing_types, classes);
+    std::vector<ProfileMethodInfo> pmi;
+    pmi.emplace_back(class_ref.dex_file, method_index, inline_caches);
+
+    profile->AddMethodsAndClasses(pmi, std::set<DexCacheResolvedClasses>());
+    return true;
+  }
+
+  // Creates a profile from a human friendly textual representation.
+  // The expected input format is:
+  //   # Classes
+  //   Ljava/lang/Comparable;
+  //   Ljava/lang/Math;
+  //   # Methods with inline caches
+  //   LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;
+  //   LTestInline;->noInlineCache(LSuper;)I
   int CreateProfile() {
     // Validate parameters for this command.
     if (apk_files_.empty() && apks_fd_.empty()) {
@@ -550,51 +741,22 @@
         return -1;
       }
     }
-    // Read the user-specified list of classes (dot notation rather than descriptors).
+    // Read the user-specified list of classes and methods.
     std::unique_ptr<std::unordered_set<std::string>>
-        user_class_list(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
+        user_lines(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
             create_profile_from_file_.c_str(), nullptr));  // No post-processing.
-    std::unordered_set<std::string> matched_user_classes;
-    // Open the dex files to look up class names.
+
+    // Open the dex files to look up classes and methods.
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     OpenApkFilesFromLocations(&dex_files);
-    // Iterate over the dex files looking for class names in the input stream.
-    std::set<DexCacheResolvedClasses> resolved_class_set;
-    for (auto& dex_file : dex_files) {
-      // Compute the set of classes to be added for this dex file first.  This
-      // avoids creating an entry in the profile information for dex files that
-      // contribute no classes.
-      std::unordered_set<dex::TypeIndex> classes_to_be_added;
-      for (const auto& klass : *user_class_list) {
-        std::string descriptor = DotToDescriptor(klass.c_str());
-        const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor.c_str());
-        if (type_id == nullptr) {
-          continue;
-        }
-        classes_to_be_added.insert(dex_file->GetIndexForTypeId(*type_id));
-        matched_user_classes.insert(klass);
-      }
-      if (classes_to_be_added.empty()) {
-        continue;
-      }
-      // Insert the DexCacheResolved Classes into the set expected for
-      // AddMethodsAndClasses.
-      std::set<DexCacheResolvedClasses>::iterator dex_resolved_classes =
-          resolved_class_set.emplace(dex_file->GetLocation(),
-                                     dex_file->GetBaseLocation(),
-                                     dex_file->GetLocationChecksum()).first;
-      dex_resolved_classes->AddClasses(classes_to_be_added.begin(), classes_to_be_added.end());
-    }
-    // Warn the user if we didn't find matches for every class.
-    for (const auto& klass : *user_class_list) {
-      if (matched_user_classes.find(klass) == matched_user_classes.end()) {
-        LOG(WARNING) << "requested class '" << klass << "' was not matched in any dex file";
-      }
-    }
-    // Generate the profile data structure.
+
+    // Process the lines one by one and add the successful ones to the profile.
     ProfileCompilationInfo info;
-    std::vector<ProfileMethodInfo> methods;  // No methods for now.
-    info.AddMethodsAndClasses(methods, resolved_class_set);
+
+    for (const auto& line : *user_lines) {
+      ProcessLine(dex_files, line, &info);
+    }
+
     // Write the profile file.
     CHECK(info.Save(fd));
     if (close(fd) < 0) {
diff --git a/runtime/Android.bp b/runtime/Android.bp
index d136aa1..d075c58 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -99,6 +99,7 @@
         "intern_table.cc",
         "interpreter/interpreter.cc",
         "interpreter/interpreter_common.cc",
+        "interpreter/interpreter_intrinsics.cc",
         "interpreter/interpreter_switch_impl.cc",
         "interpreter/unstarted_runtime.cc",
         "java_vm_ext.cc",
@@ -156,6 +157,7 @@
         "native/java_lang_Thread.cc",
         "native/java_lang_Throwable.cc",
         "native/java_lang_VMClassLoader.cc",
+        "native/java_lang_Void.cc",
         "native/java_lang_invoke_MethodHandleImpl.cc",
         "native/java_lang_ref_FinalizerReference.cc",
         "native/java_lang_ref_Reference.cc",
@@ -171,6 +173,7 @@
         "native/org_apache_harmony_dalvik_ddmc_DdmServer.cc",
         "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc",
         "native/sun_misc_Unsafe.cc",
+        "non_debuggable_classes.cc",
         "oat.cc",
         "oat_file.cc",
         "oat_file_assistant.cc",
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index daa2dff..923ff4f 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -47,24 +47,6 @@
   return instr_size;
 }
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context) {
-  // Note that in this handler we set up the registers and return to
-  // longjmp directly rather than going through an assembly language stub.  The
-  // reason for this is that longjmp is (currently) in ARM mode and that would
-  // require switching modes in the stub - incurring an unwanted relocation.
-
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  Thread* self = Thread::Current();
-  CHECK(self != nullptr);  // This will cause a SIGABRT if self is null.
-
-  sc->arm_r0 = reinterpret_cast<uintptr_t>(*self->GetNestedSignalState());
-  sc->arm_r1 = 1;
-  sc->arm_pc = reinterpret_cast<uintptr_t>(longjmp);
-  VLOG(signals) << "longjmp address: " << reinterpret_cast<void*>(sc->arm_pc);
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 8531091..72aa785 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1487,6 +1487,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    mov r0, r12  // Load interface method
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index c02be87..193af58 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -39,21 +39,6 @@
 
 namespace art {
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context) {
-  // To match the case used in ARM we return directly to the longjmp function
-  // rather than through a trivial assembly language stub.
-
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  Thread* self = Thread::Current();
-  CHECK(self != nullptr);       // This will cause a SIGABRT if self is null.
-
-  sc->regs[0] = reinterpret_cast<uintptr_t>(*self->GetNestedSignalState());
-  sc->regs[1] = 1;
-  sc->pc = reinterpret_cast<uintptr_t>(longjmp);
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index 01bd177..e5f6f11 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -53,6 +53,7 @@
     static const char* arm64_known_variants[] = {
         "cortex-a35",
         "exynos-m1",
+        "exynos-m2",
         "denver64",
         "kryo"
     };
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index bfbe481..5b5d2ef 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1183,15 +1183,13 @@
     add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
 .Lretry_lock:
     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
-    ldxr   w1, [x4]
-    mov    x3, x1
-    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
+    ldaxr  w1, [x4]                   // acquire needed only in most common case
+    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cbnz   w3, .Lnot_unlocked         // already thin locked
     // unlocked case - x1: original lock word that's zero except for the read barrier bits.
     orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
     stxr   w3, w2, [x4]
     cbnz   w3, .Llock_stxr_fail       // store failed, retry
-    dmb    ishld                      // full (LoadLoad|LoadStore) memory barrier
     ret
 .Lnot_unlocked:  // x1: original lock word
     lsr    w3, w1, LOCK_WORD_STATE_SHIFT
@@ -1200,8 +1198,7 @@
     uxth   w2, w2                     // zero top 16 bits
     cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
                                       // else contention, go to slow path
-    mov    x3, x1                     // copy the lock word to check count overflow.
-    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
+    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
     add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
     lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
     cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
@@ -1246,23 +1243,19 @@
     lsr    w2, w1, LOCK_WORD_STATE_SHIFT
     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
-    mov    x3, x1                     // copy lock word to check thread id equality
-    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
+    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
     uxth   w3, w3                     // zero top 16 bits
     cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
-    mov    x3, x1                     // copy lock word to detect transition to unlocked
-    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
+    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     // transition to unlocked
-    mov    x3, x1
-    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
-    dmb    ish                        // full (LoadStore|StoreStore) memory barrier
+    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
 #ifndef USE_READ_BARRIER
-    str    w3, [x4]
+    stlr   w3, [x4]
 #else
-    stxr   w2, w3, [x4]               // Need to use atomic instructions for read barrier
+    stlxr  w2, w3, [x4]               // Need to use atomic instructions for read barrier
     cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
 #endif
     ret
@@ -1276,7 +1269,7 @@
 #endif
     ret
 .Lunlock_stxr_fail:
-    b      .Lretry_unlock               // retry
+    b      .Lretry_unlock             // retry
 .Lslow_unlock:
     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
@@ -1973,6 +1966,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    mov x0, xIP0  // Load interface method
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 1792f31..f9c19e8 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -35,10 +35,6 @@
 
 namespace art {
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context ATTRIBUTE_UNUSED) {
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index ec8ae85..5d61539 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1763,6 +1763,7 @@
 
 .Lconflict_trampoline:
     # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
+    move    $a0, $t7                                         # Load interface method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
@@ -2048,11 +2049,12 @@
     lw    $t0, MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
 #endif
     slt   $t1, $a2, $zero # if fromIndex < 0
-#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+#if defined(_MIPS_ARCH_MIPS32R6)
     seleqz $a2, $a2, $t1  #     fromIndex = 0;
 #else
     movn   $a2, $zero, $t1 #    fromIndex = 0;
 #endif
+
 #if (STRING_COMPRESSION_FEATURE)
     srl   $t0, $a3, 1     # $a3 holds count (with flag) and $t0 holds actual length
 #endif
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 35f20fb..ef82bd2 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -70,14 +70,16 @@
 // Macros to poison (negate) the reference for heap poisoning.
 .macro POISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
-    subu \rRef, $zero, \rRef
+    dsubu \rRef, $zero, \rRef
+    dext  \rRef, \rRef, 0, 32
 #endif  // USE_HEAP_POISONING
 .endm
 
 // Macros to unpoison (negate) the reference for heap poisoning.
 .macro UNPOISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
-    subu \rRef, $zero, \rRef
+    dsubu \rRef, $zero, \rRef
+    dext  \rRef, \rRef, 0, 32
 #endif  // USE_HEAP_POISONING
 .endm
 
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 709cab5..d668d3a 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -35,10 +35,6 @@
 
 namespace art {
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context ATTRIBUTE_UNUSED) {
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 28d7c77..3ee9c4a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1715,6 +1715,7 @@
 
 .Lconflict_trampoline:
     # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
+    move   $a0, $t0                                          # Load interface method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index a4d6bb4..f407ebf 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -75,12 +75,6 @@
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
 
-// Note this is different from the others (no underscore on 64 bit mac) due to
-// the way the symbol is defined in the .S file.
-// TODO: fix the symbols for 64 bit mac - there is a double underscore prefix for some
-// of them.
-extern "C" void art_nested_signal_return();
-
 // Get the size of an instruction in bytes.
 // Return 0 if the instruction is not handled.
 static uint32_t GetInstructionSize(const uint8_t* pc) {
@@ -247,21 +241,6 @@
   return pc - startpc;
 }
 
-void FaultManager::HandleNestedSignal(int, siginfo_t*, void* context) {
-  // For the Intel architectures we need to go to an assembly language
-  // stub.  This is because the 32 bit call to longjmp is much different
-  // from the 64 bit ABI call and pushing things onto the stack inside this
-  // handler was unwieldy and ugly.  The use of the stub means we can keep
-  // this code the same for both 32 and 64 bit.
-
-  Thread* self = Thread::Current();
-  CHECK(self != nullptr);  // This will cause a SIGABRT if self is null.
-
-  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
-  uc->CTX_JMP_BUF = reinterpret_cast<uintptr_t>(*self->GetNestedSignalState());
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_nested_signal_return);
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 8c907e0..5f38dc8 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1806,6 +1806,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    movl %edi, %eax  // Load interface method
     POP EDI
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END_FUNCTION art_quick_imt_conflict_trampoline
@@ -2136,19 +2137,6 @@
     ret
 END_FUNCTION art_quick_string_compareto
 
-// Return from a nested signal:
-// Entry:
-//  eax: address of jmp_buf in TLS
-
-DEFINE_FUNCTION art_nested_signal_return
-    SETUP_GOT_NOSAVE ebx            // sets %ebx for call into PLT
-    movl LITERAL(1), %ecx
-    PUSH ecx                        // second arg to longjmp (1)
-    PUSH eax                        // first arg to longjmp (jmp_buf)
-    call PLT_SYMBOL(longjmp)
-    UNREACHABLE
-END_FUNCTION art_nested_signal_return
-
 // Create a function `name` calling the ReadBarrier::Mark routine,
 // getting its argument and returning its result through register
 // `reg`, saving and restoring all caller-save registers.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f1be52e..e87b165 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1662,6 +1662,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    movq %r10, %rdi  // Load interface method
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 #endif  // __APPLE__
 END_FUNCTION art_quick_imt_conflict_trampoline
@@ -2099,18 +2100,6 @@
     ret
 END_FUNCTION art_quick_instance_of
 
-
-// Return from a nested signal:
-// Entry:
-//  rdi: address of jmp_buf in TLS
-
-DEFINE_FUNCTION art_nested_signal_return
-                                    // first arg to longjmp is already in correct register
-    movq LITERAL(1), %rsi           // second arg to longjmp (1)
-    call PLT_SYMBOL(longjmp)
-    UNREACHABLE
-END_FUNCTION art_nested_signal_return
-
 // Create a function `name` calling the ReadBarrier::Mark routine,
 // getting its argument and returning its result through register
 // `reg`, saving and restoring all caller-save registers.
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 473d9cf..b47f8f0 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -55,8 +55,10 @@
   if (kIsDebugBuild) {
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
-      CHECK(result->IsIdxLoaded() || result->IsErroneous())
-          << result->GetStatus() << " " << result->PrettyClass();
+      if (kCheckDeclaringClassState) {
+        CHECK(result->IsIdxLoaded() || result->IsErroneous())
+            << result->GetStatus() << " " << result->PrettyClass();
+      }
     } else {
       CHECK(result == nullptr) << this;
     }
@@ -89,7 +91,7 @@
 
 template <ReadBarrierOption kReadBarrierOption>
 inline uint32_t ArtMethod::GetAccessFlags() {
-  if (kIsDebugBuild) {
+  if (kCheckDeclaringClassState) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
       if (self->IsThreadSuspensionAllowable()) {
@@ -118,8 +120,10 @@
 }
 
 inline uint32_t ArtMethod::GetDexMethodIndex() {
-  DCHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-         GetDeclaringClass()->IsErroneous());
+  if (kCheckDeclaringClassState) {
+    CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
+          GetDeclaringClass()->IsErroneous());
+  }
   return GetDexMethodIndexUnchecked();
 }
 
@@ -343,7 +347,11 @@
 
 inline uint16_t ArtMethod::GetClassDefIndex() {
   DCHECK(!IsProxyMethod());
-  return GetDeclaringClass()->GetDexClassDefIndex();
+  if (LIKELY(!IsObsolete())) {
+    return GetDeclaringClass()->GetDexClassDefIndex();
+  } else {
+    return FindObsoleteDexClassDefIndex();
+  }
 }
 
 inline const DexFile::ClassDef& ArtMethod::GetClassDef() {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 9d74e7c..80a8773 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -104,6 +104,16 @@
   UNREACHABLE();
 }
 
+uint16_t ArtMethod::FindObsoleteDexClassDefIndex() {
+  DCHECK(!Runtime::Current()->IsAotCompiler()) << PrettyMethod();
+  DCHECK(IsObsolete());
+  const DexFile* dex_file = GetDexFile();
+  const dex::TypeIndex declaring_class_type = dex_file->GetMethodId(GetDexMethodIndex()).class_idx_;
+  const DexFile::ClassDef* class_def = dex_file->FindClassDef(declaring_class_type);
+  CHECK(class_def != nullptr);
+  return dex_file->GetIndexForClassDef(*class_def);
+}
+
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
   CHECK(!IsProxyMethod());
   StackHandleScope<1> hs(self);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 3d51fdd..2248c3b 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -53,6 +53,8 @@
 
 class ArtMethod FINAL {
  public:
+  static constexpr bool kCheckDeclaringClassState = kIsDebugBuild;
+
   ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
       method_index_(0), hotness_count_(0) { }
 
@@ -223,13 +225,10 @@
   }
 
   bool IsObsolete() {
-    // TODO Should maybe make this IsIntrinsic check not needed
-    return !IsIntrinsic() && (GetAccessFlags() & kAccObsoleteMethod) != 0;
+    return (GetAccessFlags() & kAccObsoleteMethod) != 0;
   }
 
   void SetIsObsolete() {
-    // TODO We should really support redefining intrinsic if possible.
-    DCHECK(!IsIntrinsic());
     AddAccessFlags(kAccObsoleteMethod);
   }
 
@@ -701,6 +700,8 @@
   } ptr_sized_fields_;
 
  private:
+  uint16_t FindObsoleteDexClassDefIndex() REQUIRES_SHARED(Locks::mutator_lock_);
+
   bool IsAnnotatedWith(jclass klass, uint32_t visibility);
 
   static constexpr size_t PtrSizedFieldsOffset(PointerSize pointer_size) {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index c7a94a9..4a2e34f 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -246,7 +246,7 @@
 ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
 // String compression feature.
-#define STRING_COMPRESSION_FEATURE 0
+#define STRING_COMPRESSION_FEATURE 1
 ADD_TEST_EQ(STRING_COMPRESSION_FEATURE, art::mirror::kUseStringCompression);
 
 #if defined(__cplusplus)
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index db43319..5aede38 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -15,6 +15,7 @@
  */
 
 #include <algorithm>
+#include <cstddef>
 #include <iomanip>
 #include <numeric>
 
@@ -27,7 +28,7 @@
 
 namespace art {
 
-static constexpr size_t kMemoryToolRedZoneBytes = 8;
+constexpr size_t kMemoryToolRedZoneBytes = 8;
 constexpr size_t Arena::kDefaultSize;
 
 template <bool kCount>
@@ -168,23 +169,75 @@
 Arena::Arena() : bytes_allocated_(0), next_(nullptr) {
 }
 
+class MallocArena FINAL : public Arena {
+ public:
+  explicit MallocArena(size_t size = Arena::kDefaultSize);
+  virtual ~MallocArena();
+ private:
+  static constexpr size_t RequiredOverallocation() {
+    return (alignof(std::max_align_t) < ArenaAllocator::kArenaAlignment)
+        ? ArenaAllocator::kArenaAlignment - alignof(std::max_align_t)
+        : 0u;
+  }
+
+  uint8_t* unaligned_memory_;
+};
+
 MallocArena::MallocArena(size_t size) {
-  memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
-  CHECK(memory_ != nullptr);  // Abort on OOM.
-  DCHECK_ALIGNED(memory_, ArenaAllocator::kAlignment);
+  // We need to guarantee kArenaAlignment aligned allocation for the new arena.
+  // TODO: Use std::aligned_alloc() when it becomes available with C++17.
+  constexpr size_t overallocation = RequiredOverallocation();
+  unaligned_memory_ = reinterpret_cast<uint8_t*>(calloc(1, size + overallocation));
+  CHECK(unaligned_memory_ != nullptr);  // Abort on OOM.
+  DCHECK_ALIGNED(unaligned_memory_, alignof(std::max_align_t));
+  if (overallocation == 0u) {
+    memory_ = unaligned_memory_;
+  } else {
+    memory_ = AlignUp(unaligned_memory_, ArenaAllocator::kArenaAlignment);
+    if (UNLIKELY(RUNNING_ON_MEMORY_TOOL > 0)) {
+      size_t head = memory_ - unaligned_memory_;
+      size_t tail = overallocation - head;
+      MEMORY_TOOL_MAKE_NOACCESS(unaligned_memory_, head);
+      MEMORY_TOOL_MAKE_NOACCESS(memory_ + size, tail);
+    }
+  }
+  DCHECK_ALIGNED(memory_, ArenaAllocator::kArenaAlignment);
   size_ = size;
 }
 
 MallocArena::~MallocArena() {
-  free(reinterpret_cast<void*>(memory_));
+  constexpr size_t overallocation = RequiredOverallocation();
+  if (overallocation != 0u && UNLIKELY(RUNNING_ON_MEMORY_TOOL > 0)) {
+    size_t head = memory_ - unaligned_memory_;
+    size_t tail = overallocation - head;
+    MEMORY_TOOL_MAKE_UNDEFINED(unaligned_memory_, head);
+    MEMORY_TOOL_MAKE_UNDEFINED(memory_ + size_, tail);
+  }
+  free(reinterpret_cast<void*>(unaligned_memory_));
 }
 
+class MemMapArena FINAL : public Arena {
+ public:
+  MemMapArena(size_t size, bool low_4gb, const char* name);
+  virtual ~MemMapArena();
+  void Release() OVERRIDE;
+
+ private:
+  std::unique_ptr<MemMap> map_;
+};
+
 MemMapArena::MemMapArena(size_t size, bool low_4gb, const char* name) {
+  // Round up to a full page as that's the smallest unit of allocation for mmap()
+  // and we want to be able to use all memory that we actually allocate.
+  size = RoundUp(size, kPageSize);
   std::string error_msg;
   map_.reset(MemMap::MapAnonymous(
       name, nullptr, size, PROT_READ | PROT_WRITE, low_4gb, false, &error_msg));
   CHECK(map_.get() != nullptr) << error_msg;
   memory_ = map_->Begin();
+  static_assert(ArenaAllocator::kArenaAlignment <= kPageSize,
+                "Arena should not need stronger alignment than kPageSize.");
+  DCHECK_ALIGNED(memory_, ArenaAllocator::kArenaAlignment);
   size_ = map_->Size();
 }
 
@@ -332,20 +385,7 @@
   ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
   uint8_t* ret;
   if (UNLIKELY(rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
-    ret = AllocFromNewArena(rounded_bytes);
-    uint8_t* noaccess_begin = ret + bytes;
-    uint8_t* noaccess_end;
-    if (ret == arena_head_->Begin()) {
-      DCHECK(ptr_ - rounded_bytes == ret);
-      noaccess_end = end_;
-    } else {
-      // We're still using the old arena but `ret` comes from a new one just after it.
-      DCHECK(arena_head_->next_ != nullptr);
-      DCHECK(ret == arena_head_->next_->Begin());
-      DCHECK_EQ(rounded_bytes, arena_head_->next_->GetBytesAllocated());
-      noaccess_end = arena_head_->next_->End();
-    }
-    MEMORY_TOOL_MAKE_NOACCESS(noaccess_begin, noaccess_end - noaccess_begin);
+    ret = AllocFromNewArenaWithMemoryTool(rounded_bytes);
   } else {
     ret = ptr_;
     ptr_ += rounded_bytes;
@@ -356,6 +396,30 @@
   return ret;
 }
 
+void* ArenaAllocator::AllocWithMemoryToolAlign16(size_t bytes, ArenaAllocKind kind) {
+  // We mark all memory for a newly retrieved arena as inaccessible and then
+  // mark only the actually allocated memory as defined. That leaves red zones
+  // and padding between allocations marked as inaccessible.
+  size_t rounded_bytes = bytes + kMemoryToolRedZoneBytes;
+  DCHECK_ALIGNED(rounded_bytes, 8);  // `bytes` is 16-byte aligned, red zone is 8-byte aligned.
+  uintptr_t padding =
+      ((reinterpret_cast<uintptr_t>(ptr_) + 15u) & 15u) - reinterpret_cast<uintptr_t>(ptr_);
+  ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
+  uint8_t* ret;
+  if (UNLIKELY(padding + rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
+    static_assert(kArenaAlignment >= 16, "Expecting sufficient alignment for new Arena.");
+    ret = AllocFromNewArenaWithMemoryTool(rounded_bytes);
+  } else {
+    ptr_ += padding;  // Leave padding inaccessible.
+    ret = ptr_;
+    ptr_ += rounded_bytes;
+  }
+  MEMORY_TOOL_MAKE_DEFINED(ret, bytes);
+  // Check that the memory is already zeroed out.
+  DCHECK(std::all_of(ret, ret + bytes, [](uint8_t val) { return val == 0u; }));
+  return ret;
+}
+
 ArenaAllocator::~ArenaAllocator() {
   // Reclaim all the arenas by giving them back to the thread pool.
   UpdateBytesAllocated();
@@ -386,6 +450,24 @@
   return new_arena->Begin();
 }
 
+uint8_t* ArenaAllocator::AllocFromNewArenaWithMemoryTool(size_t bytes) {
+  uint8_t* ret = AllocFromNewArena(bytes);
+  uint8_t* noaccess_begin = ret + bytes;
+  uint8_t* noaccess_end;
+  if (ret == arena_head_->Begin()) {
+    DCHECK(ptr_ - bytes == ret);
+    noaccess_end = end_;
+  } else {
+    // We're still using the old arena but `ret` comes from a new one just after it.
+    DCHECK(arena_head_->next_ != nullptr);
+    DCHECK(ret == arena_head_->next_->Begin());
+    DCHECK_EQ(bytes, arena_head_->next_->GetBytesAllocated());
+    noaccess_end = arena_head_->next_->End();
+  }
+  MEMORY_TOOL_MAKE_NOACCESS(noaccess_begin, noaccess_end - noaccess_begin);
+  return ret;
+}
+
 bool ArenaAllocator::Contains(const void* ptr) const {
   if (ptr >= begin_ && ptr < end_) {
     return true;
@@ -398,7 +480,9 @@
   return false;
 }
 
-MemStats::MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
+MemStats::MemStats(const char* name,
+                   const ArenaAllocatorStats* stats,
+                   const Arena* first_arena,
                    ssize_t lost_bytes_adjustment)
     : name_(name),
       stats_(stats),
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 245ab3b..c39429c 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -34,7 +34,6 @@
 class ArenaAllocator;
 class ArenaStack;
 class ScopedArenaAllocator;
-class MemMap;
 class MemStats;
 
 template <typename T>
@@ -89,6 +88,7 @@
   kArenaAllocRegisterAllocator,
   kArenaAllocRegisterAllocatorValidate,
   kArenaAllocStackMapStream,
+  kArenaAllocVectorNode,
   kArenaAllocCodeGenerator,
   kArenaAllocAssembler,
   kArenaAllocParallelMoveResolver,
@@ -243,22 +243,6 @@
   DISALLOW_COPY_AND_ASSIGN(Arena);
 };
 
-class MallocArena FINAL : public Arena {
- public:
-  explicit MallocArena(size_t size = Arena::kDefaultSize);
-  virtual ~MallocArena();
-};
-
-class MemMapArena FINAL : public Arena {
- public:
-  MemMapArena(size_t size, bool low_4gb, const char* name);
-  virtual ~MemMapArena();
-  void Release() OVERRIDE;
-
- private:
-  std::unique_ptr<MemMap> map_;
-};
-
 class ArenaPool {
  public:
   explicit ArenaPool(bool use_malloc = true,
@@ -318,8 +302,31 @@
     return ret;
   }
 
+  // Returns zeroed memory.
+  void* AllocAlign16(size_t bytes, ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE {
+    // It is an error to request 16-byte aligned allocation of unaligned size.
+    DCHECK_ALIGNED(bytes, 16);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      return AllocWithMemoryToolAlign16(bytes, kind);
+    }
+    uintptr_t padding =
+        ((reinterpret_cast<uintptr_t>(ptr_) + 15u) & 15u) - reinterpret_cast<uintptr_t>(ptr_);
+    ArenaAllocatorStats::RecordAlloc(bytes, kind);
+    if (UNLIKELY(padding + bytes > static_cast<size_t>(end_ - ptr_))) {
+      static_assert(kArenaAlignment >= 16, "Expecting sufficient alignment for new Arena.");
+      return AllocFromNewArena(bytes);
+    }
+    ptr_ += padding;
+    uint8_t* ret = ptr_;
+    DCHECK_ALIGNED(ret, 16);
+    ptr_ += bytes;
+    return ret;
+  }
+
   // Realloc never frees the input pointer, it is the caller's job to do this if necessary.
-  void* Realloc(void* ptr, size_t ptr_size, size_t new_size,
+  void* Realloc(void* ptr,
+                size_t ptr_size,
+                size_t new_size,
                 ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE {
     DCHECK_GE(new_size, ptr_size);
     DCHECK_EQ(ptr == nullptr, ptr_size == 0u);
@@ -370,12 +377,17 @@
 
   bool Contains(const void* ptr) const;
 
-  static constexpr size_t kAlignment = 8;
+  // The alignment guaranteed for individual allocations.
+  static constexpr size_t kAlignment = 8u;
+
+  // The alignment required for the whole Arena rather than individual allocations.
+  static constexpr size_t kArenaAlignment = 16u;
 
  private:
   void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
+  void* AllocWithMemoryToolAlign16(size_t bytes, ArenaAllocKind kind);
   uint8_t* AllocFromNewArena(size_t bytes);
-
+  uint8_t* AllocFromNewArenaWithMemoryTool(size_t bytes);
 
   void UpdateBytesAllocated();
 
@@ -395,7 +407,9 @@
 
 class MemStats {
  public:
-  MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
+  MemStats(const char* name,
+           const ArenaAllocatorStats* stats,
+           const Arena* first_arena,
            ssize_t lost_bytes_adjustment = 0);
   void Dump(std::ostream& os) const;
 
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index 2c8aa28..62b974e 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -21,6 +21,7 @@
 #include <queue>
 #include <set>
 #include <stack>
+#include <unordered_map>
 #include <utility>
 
 #include "arena_allocator.h"
@@ -85,6 +86,16 @@
                              Pred,
                              ArenaAllocatorAdapter<std::pair<Key, Value>>>;
 
+template <typename Key,
+          typename Value,
+          typename Hash = std::hash<Key>,
+          typename Pred = std::equal_to<Value>>
+using ArenaUnorderedMap = std::unordered_map<Key,
+                                             Value,
+                                             Hash,
+                                             Pred,
+                                             ArenaAllocatorAdapter<std::pair<const Key, Value>>>;
+
 // Implementation details below.
 
 template <bool kCount>
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index f0811b0..4041f5e 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -152,6 +152,11 @@
   return (x & (n - 1)) == 0;
 }
 
+template<typename T>
+inline bool IsAlignedParam(T* x, int n) {
+  return IsAlignedParam(reinterpret_cast<const uintptr_t>(x), n);
+}
+
 #define CHECK_ALIGNED(value, alignment) \
   CHECK(::art::IsAligned<alignment>(value)) << reinterpret_cast<const void*>(value)
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b93b293..b0394a5 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -73,6 +73,7 @@
 Mutex* Locks::jni_weak_globals_lock_ = nullptr;
 ReaderWriterMutex* Locks::dex_lock_ = nullptr;
 std::vector<BaseMutex*> Locks::expected_mutexes_on_weak_ref_access_;
+Atomic<const BaseMutex*> Locks::expected_mutexes_on_weak_ref_access_guard_;
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
@@ -117,6 +118,26 @@
   const BaseMutex* const mutex_;
 };
 
+class Locks::ScopedExpectedMutexesOnWeakRefAccessLock FINAL {
+ public:
+  explicit ScopedExpectedMutexesOnWeakRefAccessLock(const BaseMutex* mutex) : mutex_(mutex) {
+    while (!Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakAcquire(0,
+                                                                                         mutex)) {
+      NanoSleep(100);
+    }
+  }
+
+  ~ScopedExpectedMutexesOnWeakRefAccessLock() {
+    while (!Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakRelease(mutex_,
+                                                                                         0)) {
+      NanoSleep(100);
+    }
+  }
+
+ private:
+  const BaseMutex* const mutex_;
+};
+
 // Scoped class that generates events at the beginning and end of lock contention.
 class ScopedContentionRecorder FINAL : public ValueObject {
  public:
@@ -1163,12 +1184,9 @@
     #undef UPDATE_CURRENT_LOCK_LEVEL
 
     // List of mutexes that we may hold when accessing a weak ref.
-    dex_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(dex_lock_);
-    classlinker_classes_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(classlinker_classes_lock_);
-    jni_libraries_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(jni_libraries_lock_);
+    AddToExpectedMutexesOnWeakRefAccess(dex_lock_, /*need_lock*/ false);
+    AddToExpectedMutexesOnWeakRefAccess(classlinker_classes_lock_, /*need_lock*/ false);
+    AddToExpectedMutexesOnWeakRefAccess(jni_libraries_lock_, /*need_lock*/ false);
 
     InitConditions();
   }
@@ -1188,4 +1206,38 @@
   return safe_to_call_abort_cb != nullptr && safe_to_call_abort_cb();
 }
 
+void Locks::AddToExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock) {
+  if (need_lock) {
+    ScopedExpectedMutexesOnWeakRefAccessLock mu(mutex);
+    mutex->SetShouldRespondToEmptyCheckpointRequest(true);
+    expected_mutexes_on_weak_ref_access_.push_back(mutex);
+  } else {
+    mutex->SetShouldRespondToEmptyCheckpointRequest(true);
+    expected_mutexes_on_weak_ref_access_.push_back(mutex);
+  }
+}
+
+void Locks::RemoveFromExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock) {
+  if (need_lock) {
+    ScopedExpectedMutexesOnWeakRefAccessLock mu(mutex);
+    mutex->SetShouldRespondToEmptyCheckpointRequest(false);
+    std::vector<BaseMutex*>& list = expected_mutexes_on_weak_ref_access_;
+    auto it = std::find(list.begin(), list.end(), mutex);
+    DCHECK(it != list.end());
+    list.erase(it);
+  } else {
+    mutex->SetShouldRespondToEmptyCheckpointRequest(false);
+    std::vector<BaseMutex*>& list = expected_mutexes_on_weak_ref_access_;
+    auto it = std::find(list.begin(), list.end(), mutex);
+    DCHECK(it != list.end());
+    list.erase(it);
+  }
+}
+
+bool Locks::IsExpectedOnWeakRefAccess(BaseMutex* mutex) {
+  ScopedExpectedMutexesOnWeakRefAccessLock mu(mutex);
+  std::vector<BaseMutex*>& list = expected_mutexes_on_weak_ref_access_;
+  return std::find(list.begin(), list.end(), mutex) != list.end();
+}
+
 }  // namespace art
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 9b6938f..038aeb3 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -516,12 +516,12 @@
 // construction and releases it upon destruction.
 class SCOPED_CAPABILITY ReaderMutexLock {
  public:
-  ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) :
+  ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) ALWAYS_INLINE :
       self_(self), mu_(mu) {
     mu_.SharedLock(self_);
   }
 
-  ~ReaderMutexLock() RELEASE() {
+  ~ReaderMutexLock() RELEASE() ALWAYS_INLINE {
     mu_.SharedUnlock(self_);
   }
 
@@ -583,6 +583,12 @@
   // Checks for whether it is safe to call Abort() without using locks.
   static bool IsSafeToCallAbortRacy() NO_THREAD_SAFETY_ANALYSIS;
 
+  // Add a mutex to expected_mutexes_on_weak_ref_access_.
+  static void AddToExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock = true);
+  // Remove a mutex from expected_mutexes_on_weak_ref_access_.
+  static void RemoveFromExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock = true);
+  // Check if the given mutex is in expected_mutexes_on_weak_ref_access_.
+  static bool IsExpectedOnWeakRefAccess(BaseMutex* mutex);
 
   // Guards allocation entrypoint instrumenting.
   static Mutex* instrument_entrypoints_lock_;
@@ -734,6 +740,8 @@
   // encounter an unexpected mutex on accessing weak refs,
   // Thread::CheckEmptyCheckpointFromWeakRefAccess will detect it.
   static std::vector<BaseMutex*> expected_mutexes_on_weak_ref_access_;
+  static Atomic<const BaseMutex*> expected_mutexes_on_weak_ref_access_guard_;
+  class ScopedExpectedMutexesOnWeakRefAccessLock;
 };
 
 class Roles {
diff --git a/runtime/base/scoped_arena_allocator.h b/runtime/base/scoped_arena_allocator.h
index 55044b3..1a0eb5e 100644
--- a/runtime/base/scoped_arena_allocator.h
+++ b/runtime/base/scoped_arena_allocator.h
@@ -39,8 +39,6 @@
   kFree,
 };
 
-static constexpr size_t kArenaAlignment = 8;
-
 // Holds a list of Arenas for use by ScopedArenaAllocator stack.
 // The memory is returned to the ArenaPool when the ArenaStack is destroyed.
 class ArenaStack : private DebugStackRefCounter, private ArenaAllocatorMemoryTool {
@@ -67,6 +65,9 @@
     return *(reinterpret_cast<ArenaFreeTag*>(ptr) - 1);
   }
 
+  // The alignment guaranteed for individual allocations.
+  static constexpr size_t kAlignment = 8u;
+
  private:
   struct Peak;
   struct Current;
@@ -89,8 +90,8 @@
     if (UNLIKELY(IsRunningOnMemoryTool())) {
       return AllocWithMemoryTool(bytes, kind);
     }
-    // Add kArenaAlignment for the free or used tag. Required to preserve alignment.
-    size_t rounded_bytes = RoundUp(bytes + (kIsDebugBuild ? kArenaAlignment : 0u), kArenaAlignment);
+    // Add kAlignment for the free or used tag. Required to preserve alignment.
+    size_t rounded_bytes = RoundUp(bytes + (kIsDebugBuild ? kAlignment : 0u), kAlignment);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
@@ -98,7 +99,7 @@
     CurrentStats()->RecordAlloc(bytes, kind);
     top_ptr_ = ptr + rounded_bytes;
     if (kIsDebugBuild) {
-      ptr += kArenaAlignment;
+      ptr += kAlignment;
       ArenaTagForAllocation(ptr) = ArenaFreeTag::kUsed;
     }
     return ptr;
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index ff2dd1b..03fc959 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -73,7 +73,7 @@
   }
   if (auto_close_ && fd_ != -1) {
     if (Close() != 0) {
-      PLOG(WARNING) << "Failed to close file " << file_path_;
+      PLOG(WARNING) << "Failed to close file with fd=" << fd_ << " path=" << file_path_;
     }
   }
 }
diff --git a/compiler/optimizing/bytecode_utils.h b/runtime/bytecode_utils.h
similarity index 96%
rename from compiler/optimizing/bytecode_utils.h
rename to runtime/bytecode_utils.h
index 133afa4..fa87b1d 100644
--- a/compiler/optimizing/bytecode_utils.h
+++ b/runtime/bytecode_utils.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
-#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#ifndef ART_RUNTIME_BYTECODE_UTILS_H_
+#define ART_RUNTIME_BYTECODE_UTILS_H_
 
 #include "base/arena_object.h"
 #include "dex_file.h"
@@ -177,4 +177,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#endif  // ART_RUNTIME_BYTECODE_UTILS_H_
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index bd510ca..9ddc6cf 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -161,9 +161,15 @@
   return resolved_method;
 }
 
-inline ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
-                                               ObjPtr<mirror::DexCache> dex_cache) {
-  return dex_cache->GetResolvedField(field_idx, image_pointer_size_);
+inline ArtField* ClassLinker::LookupResolvedField(uint32_t field_idx,
+                                                  ArtMethod* referrer,
+                                                  bool is_static) {
+  ObjPtr<mirror::DexCache> dex_cache = referrer->GetDexCache();
+  ArtField* field = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
+  if (field == nullptr) {
+    field = LookupResolvedField(field_idx, dex_cache, referrer->GetClassLoader(), is_static);
+  }
+  return field;
 }
 
 inline ArtField* ClassLinker::ResolveField(uint32_t field_idx,
@@ -171,7 +177,8 @@
                                            bool is_static) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
-  ArtField* resolved_field = GetResolvedField(field_idx, referrer->GetDexCache());
+  ArtField* resolved_field =
+      referrer->GetDexCache()->GetResolvedField(field_idx, image_pointer_size_);
   if (UNLIKELY(resolved_field == nullptr)) {
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 46f1644..eb7d7bd 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -752,22 +752,6 @@
                FindSystemClass(self, "[Ljava/lang/StackTraceElement;"));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
-  // Ensure void type is resolved in the core's dex cache so java.lang.Void is correctly
-  // initialized.
-  {
-    const DexFile& dex_file = java_lang_Object->GetDexFile();
-    const DexFile::TypeId* void_type_id = dex_file.FindTypeId("V");
-    CHECK(void_type_id != nullptr);
-    dex::TypeIndex void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
-    // Now we resolve void type so the dex cache contains it. We use java.lang.Object class
-    // as referrer so the used dex cache is core's one.
-    ObjPtr<mirror::Class> resolved_type = ResolveType(dex_file,
-                                                      void_type_idx,
-                                                      java_lang_Object.Get());
-    CHECK_EQ(resolved_type, GetClassRoot(kPrimitiveVoid));
-    self->AssertNoPendingException();
-  }
-
   // Create conflict tables that depend on the class linker.
   runtime->FixupConflictTables();
 
@@ -922,7 +906,6 @@
       runtime->GetOatFileManager().RegisterImageOatFiles(spaces);
   DCHECK(!oat_files.empty());
   const OatHeader& default_oat_header = oat_files[0]->GetOatHeader();
-  CHECK_EQ(default_oat_header.GetImageFileLocationOatChecksum(), 0U);
   CHECK_EQ(default_oat_header.GetImageFileLocationOatDataBegin(), 0U);
   const char* image_file_location = oat_files[0]->GetOatHeader().
       GetStoreValueByKey(OatHeader::kImageLocationKey);
@@ -1041,7 +1024,8 @@
            class_loader->GetClass();
 }
 
-static mirror::String* GetDexPathListElementName(ObjPtr<mirror::Object> element)
+static bool GetDexPathListElementName(ObjPtr<mirror::Object> element,
+                                      ObjPtr<mirror::String>* out_name)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtField* const dex_file_field =
       jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
@@ -1053,17 +1037,20 @@
   CHECK_EQ(dex_file_field->GetDeclaringClass(), element->GetClass()) << element->PrettyTypeOf();
   ObjPtr<mirror::Object> dex_file = dex_file_field->GetObject(element);
   if (dex_file == nullptr) {
-    return nullptr;
+    // Null dex file means it was probably a jar with no dex files, return a null string.
+    *out_name = nullptr;
+    return true;
   }
   ObjPtr<mirror::Object> name_object = dex_file_name_field->GetObject(dex_file);
   if (name_object != nullptr) {
-    return name_object->AsString();
+    *out_name = name_object->AsString();
+    return true;
   }
-  return nullptr;
+  return false;
 }
 
 static bool FlattenPathClassLoader(ObjPtr<mirror::ClassLoader> class_loader,
-                                   std::list<mirror::String*>* out_dex_file_names,
+                                   std::list<ObjPtr<mirror::String>>* out_dex_file_names,
                                    std::string* error_msg)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(out_dex_file_names != nullptr);
@@ -1099,12 +1086,14 @@
             *error_msg = StringPrintf("Null dex element at index %d", i);
             return false;
           }
-          ObjPtr<mirror::String> const name = GetDexPathListElementName(element);
-          if (name == nullptr) {
-            *error_msg = StringPrintf("Null name for dex element at index %d", i);
+          ObjPtr<mirror::String> name;
+          if (!GetDexPathListElementName(element, &name)) {
+            *error_msg = StringPrintf("Invalid dex path list element at index %d", i);
             return false;
           }
-          out_dex_file_names->push_front(name.Ptr());
+          if (name != nullptr) {
+            out_dex_file_names->push_front(name.Ptr());
+          }
         }
       }
     }
@@ -1173,9 +1162,7 @@
     for (ObjPtr<mirror::Class> klass : classes_) {
       for (uint32_t i = 0, num = klass->NumDirectInterfaces(); i != num; ++i) {
         CHECK(klass->GetDirectInterface(self_, klass, i) != nullptr)
-            << klass->PrettyDescriptor() << " iface #" << i
-            << klass->GetDexFile().StringByTypeIdx(klass->GetDirectInterfaceTypeIdx(i))
-            << " Bug: 34839984";
+            << klass->PrettyDescriptor() << " iface #" << i;
       }
     }
   }
@@ -1292,7 +1279,10 @@
           num_types = dex_file->NumTypeIds();
         }
         const size_t num_methods = dex_file->NumMethodIds();
-        const size_t num_fields = dex_file->NumFieldIds();
+        size_t num_fields = mirror::DexCache::kDexCacheFieldCacheSize;
+        if (dex_file->NumFieldIds() < num_fields) {
+          num_fields = dex_file->NumFieldIds();
+        }
         size_t num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
         if (dex_file->NumProtoIds() < num_method_types) {
           num_method_types = dex_file->NumProtoIds();
@@ -1336,17 +1326,22 @@
           dex_cache->SetResolvedMethods(methods);
         }
         if (num_fields != 0u) {
-          ArtField** const fields =
-              reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
-          for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
-            DCHECK(fields[j] == nullptr);
+          mirror::FieldDexCacheType* const image_resolved_fields = dex_cache->GetResolvedFields();
+          mirror::FieldDexCacheType* const fields =
+              reinterpret_cast<mirror::FieldDexCacheType*>(raw_arrays + layout.FieldsOffset());
+          for (size_t j = 0; j < num_fields; ++j) {
+            DCHECK_EQ(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size_).index,
+                      0u);
+            DCHECK(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size_).object ==
+                   nullptr);
+            mirror::DexCache::SetNativePairPtrSize(
+                fields,
+                j,
+                mirror::DexCache::GetNativePairPtrSize(image_resolved_fields,
+                                                       j,
+                                                       image_pointer_size_),
+                image_pointer_size_);
           }
-          CopyNonNull(dex_cache->GetResolvedFields(),
-                      num_fields,
-                      fields,
-                      [] (const ArtField* field) {
-                          return field == nullptr;
-                      });
           dex_cache->SetResolvedFields(fields);
         }
         if (num_method_types != 0u) {
@@ -1785,14 +1780,14 @@
       *error_msg = "Unexpected BootClassLoader in app image";
       return false;
     }
-    std::list<mirror::String*> image_dex_file_names;
+    std::list<ObjPtr<mirror::String>> image_dex_file_names;
     std::string temp_error_msg;
     if (!FlattenPathClassLoader(image_class_loader.Get(), &image_dex_file_names, &temp_error_msg)) {
       *error_msg = StringPrintf("Failed to flatten image class loader hierarchy '%s'",
                                 temp_error_msg.c_str());
       return false;
     }
-    std::list<mirror::String*> loader_dex_file_names;
+    std::list<ObjPtr<mirror::String>> loader_dex_file_names;
     if (!FlattenPathClassLoader(class_loader.Get(), &loader_dex_file_names, &temp_error_msg)) {
       *error_msg = StringPrintf("Failed to flatten class loader hierarchy '%s'",
                                 temp_error_msg.c_str());
@@ -1804,7 +1799,10 @@
       ObjPtr<mirror::Object> element = elements->GetWithoutChecks(i);
       if (element != nullptr) {
         // If we are somewhere in the middle of the array, there may be nulls at the end.
-        loader_dex_file_names.push_back(GetDexPathListElementName(element));
+        ObjPtr<mirror::String> name;
+        if (GetDexPathListElementName(element, &name) && name != nullptr) {
+          loader_dex_file_names.push_back(name);
+        }
       }
     }
     // Ignore the number of image dex files since we are adding those to the class loader anyways.
@@ -1920,12 +1918,22 @@
     // Since it ensures classes are in the class table.
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
     header.VisitPackedArtMethods(&visitor2, space->Begin(), kRuntimePointerSize);
-  }
-  if (app_image) {
-    // TODO: Restrict this check to debug builds. Bug: 34839984
+    // Verify that all direct interfaces of classes in the class table are also resolved.
     VerifyDirectInterfacesInTableClassVisitor visitor(class_loader.Get());
     class_table->Visit(visitor);
     visitor.Check();
+    // Check that all non-primitive classes in dex caches are also in the class table.
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      ObjPtr<mirror::DexCache> dex_cache = dex_caches->Get(i);
+      mirror::TypeDexCacheType* const types = dex_cache->GetResolvedTypes();
+      for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
+        ObjPtr<mirror::Class> klass = types[j].load(std::memory_order_relaxed).object.Read();
+        if (klass != nullptr && !klass->IsPrimitive()) {
+          CHECK(class_table->Contains(klass)) << klass->PrettyDescriptor()
+              << " " << dex_cache->GetDexFile()->GetLocation();
+        }
+      }
+    }
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -3471,6 +3479,11 @@
     return nullptr;
   }
   table->InsertStrongRoot(h_dex_cache.Get());
+  if (h_class_loader.Get() != nullptr) {
+    // Since we added a strong root to the class table, do the write barrier as required for
+    // remembered sets and generational GCs.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(h_class_loader.Get());
+  }
   return h_dex_cache.Get();
 }
 
@@ -3798,14 +3811,10 @@
 }
 
 void ClassLinker::WriteBarrierForBootOatFileBssRoots(const OatFile* oat_file) {
-  if (!kUseReadBarrier) {
-    WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    DCHECK(!oat_file->GetBssGcRoots().empty()) << oat_file->GetLocation();
-    if (log_new_roots_ && !ContainsElement(new_bss_roots_boot_oat_files_, oat_file)) {
-      new_bss_roots_boot_oat_files_.push_back(oat_file);
-    }
-  } else {
-    LOG(FATAL) << "UNREACHABLE";
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  DCHECK(!oat_file->GetBssGcRoots().empty()) << oat_file->GetLocation();
+  if (log_new_roots_ && !ContainsElement(new_bss_roots_boot_oat_files_, oat_file)) {
+    new_bss_roots_boot_oat_files_.push_back(oat_file);
   }
 }
 
@@ -3906,8 +3915,10 @@
   if (!supertype->IsVerified() && !supertype->IsErroneous()) {
     VerifyClass(self, supertype);
   }
-  if (supertype->IsCompileTimeVerified()) {
-    // Either we are verified or we soft failed and need to retry at runtime.
+
+  if (supertype->IsVerified() || supertype->ShouldVerifyAtRuntime()) {
+    // The supertype is either verified, or we soft failed at AOT time.
+    DCHECK(supertype->IsVerified() || Runtime::Current()->IsAotCompiler());
     return true;
   }
   // If we got this far then we have a hard failure.
@@ -3973,13 +3984,16 @@
       return verifier::MethodVerifier::kHardFailure;
     }
 
-    // Don't attempt to re-verify if already sufficiently verified.
+    // Don't attempt to re-verify if already verified.
     if (klass->IsVerified()) {
       EnsureSkipAccessChecksMethods(klass, image_pointer_size_);
       return verifier::MethodVerifier::kNoFailure;
     }
-    if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
-      return verifier::MethodVerifier::kNoFailure;
+
+    // For AOT, don't attempt to re-verify if we have already found we should
+    // verify at runtime.
+    if (Runtime::Current()->IsAotCompiler() && klass->ShouldVerifyAtRuntime()) {
+      return verifier::MethodVerifier::kSoftFailure;
     }
 
     if (klass->GetStatus() == mirror::Class::kStatusResolved) {
@@ -4161,19 +4175,6 @@
     return false;
   }
 
-  // We may be running with a preopted oat file but without image. In this case,
-  // we don't skip verification of skip_access_checks classes to ensure we initialize
-  // dex caches with all types resolved during verification.
-  // We need to trust image classes, as these might be coming out of a pre-opted, quickened boot
-  // image (that we just failed loading), and the verifier can't be run on quickened opcodes when
-  // the runtime isn't started. On the other hand, app classes can be re-verified even if they are
-  // already pre-opted, as then the runtime is started.
-  if (!Runtime::Current()->IsAotCompiler() &&
-      !Runtime::Current()->GetHeap()->HasBootImageSpace() &&
-      klass->GetClassLoader() != nullptr) {
-    return false;
-  }
-
   uint16_t class_def_index = klass->GetDexClassDefIndex();
   oat_file_class_status = oat_dex_file->GetOatClass(class_def_index).GetStatus();
   if (oat_file_class_status == mirror::Class::kStatusVerified ||
@@ -4427,9 +4428,15 @@
   // Create constructor for Proxy that must initialize the method.
   CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 23u);
 
-  ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
-      8, image_pointer_size_);
-  DCHECK_EQ(std::string(proxy_constructor->GetName()), "<init>");
+  // Find the <init>(InvocationHandler)V method. The exact method offset varies depending
+  // on which front-end compiler was used to build the libcore DEX files.
+  ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->
+      FindDeclaredDirectMethod("<init>",
+                               "(Ljava/lang/reflect/InvocationHandler;)V",
+                               image_pointer_size_);
+  DCHECK(proxy_constructor != nullptr)
+      << "Could not find <init> method in java.lang.reflect.Proxy";
+
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
   // constructor method.
   GetClassRoot(kJavaLangReflectProxy)->GetDexCache()->SetResolvedMethod(
@@ -4546,108 +4553,6 @@
   return CanWeInitializeClass(super_class, can_init_statics, can_init_parents);
 }
 
-std::string DescribeSpace(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
-  std::ostringstream oss;
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  gc::space::ContinuousSpace* cs = heap->FindContinuousSpaceFromAddress(klass.Ptr());
-  if (cs != nullptr) {
-    if (cs->IsImageSpace()) {
-      oss << "image/" << cs->GetName() << "/" << cs->AsImageSpace()->GetImageFilename();
-    } else {
-      oss << "continuous/" << cs->GetName();
-    }
-  } else {
-    gc::space::DiscontinuousSpace* ds =
-        heap->FindDiscontinuousSpaceFromObject(klass, /* fail_ok */ true);
-    if (ds != nullptr) {
-      oss << "discontinuous/" << ds->GetName();
-    } else {
-      oss << "invalid";
-    }
-  }
-  return oss.str();
-}
-
-std::string DescribeLoaders(ObjPtr<mirror::Class> klass, const char* iface_descriptor)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  std::ostringstream oss;
-  uint32_t hash = ComputeModifiedUtf8Hash(iface_descriptor);
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  ObjPtr<mirror::Class> path_class_loader =
-      soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader);
-  ObjPtr<mirror::Class> dex_class_loader =
-      soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_DexClassLoader);
-
-  // Print the class loader chain.
-  bool found_iface;
-  const char* loader_separator = "";
-  for (ObjPtr<mirror::ClassLoader> loader = klass->GetClassLoader();
-       loader != nullptr;
-       loader = loader->GetParent()) {
-    oss << loader_separator << loader->GetClass()->PrettyDescriptor();
-    loader_separator = ";";
-    // If we didn't find the interface yet, try to find it in the current class loader.
-    if (!found_iface) {
-      ClassTable* table = Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(loader);
-      ObjPtr<mirror::Class> iface =
-          (table != nullptr) ? table->Lookup(iface_descriptor, hash) : nullptr;
-      if (iface != nullptr) {
-        found_iface = true;
-        oss << "[hit:" << DescribeSpace(iface) << "]";
-      }
-    }
-
-    // For PathClassLoader or DexClassLoader also dump the dex file locations.
-    if (loader->GetClass() == path_class_loader || loader->GetClass() == dex_class_loader) {
-      ArtField* const cookie_field =
-          jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
-      ArtField* const dex_file_field =
-          jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
-      ObjPtr<mirror::Object> dex_path_list =
-          jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
-              GetObject(loader);
-      if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
-        ObjPtr<mirror::Object> dex_elements_obj =
-            jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-            GetObject(dex_path_list);
-        if (dex_elements_obj != nullptr) {
-          ObjPtr<mirror::ObjectArray<mirror::Object>> dex_elements =
-              dex_elements_obj->AsObjectArray<mirror::Object>();
-          oss << "(";
-          const char* path_separator = "";
-          for (int32_t i = 0; i != dex_elements->GetLength(); ++i) {
-            ObjPtr<mirror::Object> element = dex_elements->GetWithoutChecks(i);
-            ObjPtr<mirror::Object> dex_file =
-                (element != nullptr) ? dex_file_field->GetObject(element) : nullptr;
-            ObjPtr<mirror::LongArray> long_array =
-                (dex_file != nullptr) ? cookie_field->GetObject(dex_file)->AsLongArray() : nullptr;
-            if (long_array != nullptr) {
-              int32_t long_array_size = long_array->GetLength();
-              // First element is the oat file.
-              for (int32_t j = kDexFileIndexStart; j < long_array_size; ++j) {
-                const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(
-                    static_cast<uintptr_t>(long_array->GetWithoutChecks(j)));
-                oss << path_separator << cp_dex_file->GetLocation();
-                path_separator = ":";
-              }
-            }
-          }
-          oss << ")";
-        }
-      }
-    }
-  }
-
-  // Do a paranoid check that the `klass` itself is in the class table.
-  ClassTable* table =
-      Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(klass->GetClassLoader());
-  ObjPtr<mirror::Class> k = (table != nullptr) ? table->LookupByDescriptor(klass) : nullptr;
-  if (k != klass) {
-    oss << "{FAIL:" << k.Ptr() << "!=" << klass.Ptr() << "}";
-  }
-  return oss.str();
-}
-
 bool ClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass,
                                   bool can_init_statics, bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
@@ -4795,15 +4700,7 @@
       MutableHandle<mirror::Class> handle_scope_iface(hs_iface.NewHandle<mirror::Class>(nullptr));
       for (size_t i = 0; i < num_direct_interfaces; i++) {
         handle_scope_iface.Assign(mirror::Class::GetDirectInterface(self, klass.Get(), i));
-        if (UNLIKELY(handle_scope_iface == nullptr)) {
-          const char* iface_descriptor =
-              klass->GetDexFile().StringByTypeIdx(klass->GetDirectInterfaceTypeIdx(i));
-          LOG(FATAL) << "Check failed: handle_scope_iface != nullptr "
-              << "Debug data for bug 34839984: "
-              << klass->PrettyDescriptor() << " iface #" << i << " " << iface_descriptor
-              << " space: " << DescribeSpace(klass.Get())
-              << " loaders: " << DescribeLoaders(klass.Get(), iface_descriptor);
-        }
+        CHECK(handle_scope_iface != nullptr) << klass->PrettyDescriptor() << " iface #" << i;
         CHECK(handle_scope_iface->IsInterface());
         if (handle_scope_iface->HasBeenRecursivelyInitialized()) {
           // We have already done this for this interface. Skip it.
@@ -4939,7 +4836,7 @@
     // First we initialize all of iface's super-interfaces recursively.
     for (size_t i = 0; i < num_direct_ifaces; i++) {
       ObjPtr<mirror::Class> super_iface = mirror::Class::GetDirectInterface(self, iface.Get(), i);
-      DCHECK(super_iface != nullptr);
+      CHECK(super_iface != nullptr) << iface->PrettyDescriptor() << " iface #" << i;
       if (!super_iface->HasBeenRecursivelyInitialized()) {
         // Recursive step
         handle_super_iface.Assign(super_iface);
@@ -8260,6 +8157,43 @@
   return resolved;
 }
 
+ArtField* ClassLinker::LookupResolvedField(uint32_t field_idx,
+                                           ObjPtr<mirror::DexCache> dex_cache,
+                                           ObjPtr<mirror::ClassLoader> class_loader,
+                                           bool is_static) {
+  const DexFile& dex_file = *dex_cache->GetDexFile();
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
+  ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(field_id.class_idx_);
+  if (klass == nullptr) {
+    klass = LookupResolvedType(dex_file, field_id.class_idx_, dex_cache, class_loader);
+  }
+  if (klass == nullptr) {
+    // The class has not been resolved yet, so the field is also unresolved.
+    return nullptr;
+  }
+  DCHECK(klass->IsResolved());
+  Thread* self = is_static ? Thread::Current() : nullptr;
+
+  // First try to find a field declared directly by `klass` by the field index.
+  ArtField* resolved_field = is_static
+      ? mirror::Class::FindStaticField(self, klass, dex_cache, field_idx)
+      : klass->FindInstanceField(dex_cache, field_idx);
+
+  if (resolved_field == nullptr) {
+    // If not found in `klass` by field index, search the class hierarchy using the name and type.
+    const char* name = dex_file.GetFieldName(field_id);
+    const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+    resolved_field = is_static
+        ? mirror::Class::FindStaticField(self, klass, name, type)
+        : klass->FindInstanceField(name, type);
+  }
+
+  if (resolved_field != nullptr) {
+    dex_cache->SetResolvedField(field_idx, resolved_field, image_pointer_size_);
+  }
+  return resolved_field;
+}
+
 ArtField* ClassLinker::ResolveField(const DexFile& dex_file,
                                     uint32_t field_idx,
                                     Handle<mirror::DexCache> dex_cache,
@@ -8320,9 +8254,8 @@
     return nullptr;
   }
 
-  StringPiece name(dex_file.StringDataByIdx(field_id.name_idx_));
-  StringPiece type(dex_file.StringDataByIdx(
-      dex_file.GetTypeId(field_id.type_idx_).descriptor_idx_));
+  StringPiece name(dex_file.GetFieldName(field_id));
+  StringPiece type(dex_file.GetFieldTypeDescriptor(field_id));
   resolved = mirror::Class::FindField(self, klass, name, type);
   if (resolved != nullptr) {
     dex_cache->SetResolvedField(field_idx, resolved, image_pointer_size_);
@@ -8946,7 +8879,7 @@
   return ret;
 }
 
-std::unordered_set<std::string> ClassLinker::GetClassDescriptorsForProfileKeys(
+std::unordered_set<std::string> ClassLinker::GetClassDescriptorsForResolvedClasses(
     const std::set<DexCacheResolvedClasses>& classes) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   std::unordered_set<std::string> ret;
@@ -8961,14 +8894,13 @@
       if (dex_cache != nullptr) {
         const DexFile* dex_file = dex_cache->GetDexFile();
         // There could be duplicates if two dex files with the same location are mapped.
-        location_to_dex_file.emplace(
-            ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation()), dex_file);
+        location_to_dex_file.emplace(dex_file->GetLocation(), dex_file);
       }
     }
   }
   for (const DexCacheResolvedClasses& info : classes) {
-    const std::string& profile_key = info.GetDexLocation();
-    auto found = location_to_dex_file.find(profile_key);
+    const std::string& location = info.GetDexLocation();
+    auto found = location_to_dex_file.find(location);
     if (found != location_to_dex_file.end()) {
       const DexFile* dex_file = found->second;
       VLOG(profiler) << "Found opened dex file for " << dex_file->GetLocation() << " with "
@@ -8980,7 +8912,7 @@
         ret.insert(descriptor);
       }
     } else {
-      VLOG(class_linker) << "Failed to find opened dex file for profile key " << profile_key;
+      VLOG(class_linker) << "Failed to find opened dex file for location " << location;
     }
   }
   return ret;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 33eed3c..6254acb 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -333,7 +333,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  ArtField* GetResolvedField(uint32_t field_idx, ObjPtr<mirror::DexCache> dex_cache)
+  ArtField* LookupResolvedField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_);
   ArtField* ResolveField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -617,7 +617,8 @@
   std::set<DexCacheResolvedClasses> GetResolvedClasses(bool ignore_boot_classes)
       REQUIRES(!Locks::dex_lock_);
 
-  std::unordered_set<std::string> GetClassDescriptorsForProfileKeys(
+  // Returns the class descriptors for loaded dex files.
+  std::unordered_set<std::string> GetClassDescriptorsForResolvedClasses(
       const std::set<DexCacheResolvedClasses>& classes)
       REQUIRES(!Locks::dex_lock_);
 
@@ -841,6 +842,13 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Find a field by its field index.
+  ArtField* LookupResolvedField(uint32_t field_idx,
+                                ObjPtr<mirror::DexCache> dex_cache,
+                                ObjPtr<mirror::ClassLoader> class_loader,
+                                bool is_static)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void RegisterDexFileLocked(const DexFile& dex_file,
                              ObjPtr<mirror::DexCache> dex_cache,
                              ObjPtr<mirror::ClassLoader> class_loader)
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 21cdede..e5722a1 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -139,7 +139,7 @@
     EXPECT_FALSE(JavaLangObject->IsFinal());
     EXPECT_FALSE(JavaLangObject->IsPrimitive());
     EXPECT_FALSE(JavaLangObject->IsSynthetic());
-    EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
+    EXPECT_EQ(4U, JavaLangObject->NumDirectMethods());
     EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
     if (!kUseBrooksReadBarrier) {
       EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index af4f998..374b711 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -55,6 +55,12 @@
   return nullptr;
 }
 
+// To take into account http://b/35845221
+#pragma clang diagnostic push
+#if __clang_major__ < 4
+#pragma clang diagnostic ignored "-Wunreachable-code"
+#endif
+
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
   WriterMutexLock mu(Thread::Current(), lock_);
   // Should only be updating latest table.
@@ -80,6 +86,8 @@
   return existing;
 }
 
+#pragma clang diagnostic pop
+
 size_t ClassTable::CountDefiningLoaderClasses(ObjPtr<mirror::ClassLoader> defining_loader,
                                               const ClassSet& set) const {
   size_t count = 0;
@@ -105,6 +113,20 @@
   return CountDefiningLoaderClasses(defining_loader, classes_.back());
 }
 
+size_t ClassTable::NumReferencedZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  size_t sum = 0;
+  for (size_t i = 0; i < classes_.size() - 1; ++i) {
+    sum += classes_[i].Size();
+  }
+  return sum;
+}
+
+size_t ClassTable::NumReferencedNonZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  return classes_.back().Size();
+}
+
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
   DescriptorHashPair pair(descriptor, hash);
   ReaderMutexLock mu(Thread::Current(), lock_);
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 711eae4..79f5aea 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -144,16 +144,26 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns the number of classes in previous snapshots.
+  // Returns the number of classes in previous snapshots defined by `defining_loader`.
   size_t NumZygoteClasses(ObjPtr<mirror::ClassLoader> defining_loader) const
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns all off the classes in the lastest snapshot.
+  // Returns all off the classes in the lastest snapshot defined by `defining_loader`.
   size_t NumNonZygoteClasses(ObjPtr<mirror::ClassLoader> defining_loader) const
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Returns the number of classes in previous snapshots no matter the defining loader.
+  size_t NumReferencedZygoteClasses() const
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns all off the classes in the lastest snapshot no matter the defining loader.
+  size_t NumReferencedNonZygoteClasses() const
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Update a class in the table with the new class. Returns the existing class which was replaced.
   mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
       REQUIRES(!lock_)
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index b6a2e09..35e9d5d 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -179,6 +179,14 @@
                                              std::string* error_msg) {
   ScopedTrace trace(std::string("Open dex file from mapped-memory ") + location);
   CHECK(map.get() != nullptr);
+
+  if (map->Size() < sizeof(DexFile::Header)) {
+    *error_msg = StringPrintf(
+        "DexFile: failed to open dex file '%s' that is too short to have a header",
+        location.c_str());
+    return nullptr;
+  }
+
   std::unique_ptr<DexFile> dex_file = OpenCommon(map->Begin(),
                                                  map->Size(),
                                                  location,
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index d39ea35..6b9654d 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -41,7 +41,80 @@
 };
 
 namespace {
-mirror::Object* CreateAnnotationMember(Handle<mirror::Class> klass,
+
+// A helper class that contains all the data needed to do annotation lookup.
+class ClassData {
+ public:
+  explicit ClassData(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_)
+    : ClassData(ScopedNullHandle<mirror::Class>(),  // klass
+                method,
+                *method->GetDexFile(),
+                &method->GetClassDef()) {}
+
+  // Requires Scope to be able to create at least 1 handles.
+  template <typename Scope>
+  ClassData(Scope& hs, ArtField* field) REQUIRES_SHARED(Locks::mutator_lock_)
+    : ClassData(hs.NewHandle(field->GetDeclaringClass())) { }
+
+  explicit ClassData(Handle<mirror::Class> klass) REQUIRES_SHARED(art::Locks::mutator_lock_)
+    : ClassData(klass,  // klass
+                nullptr,  // method
+                klass->GetDexFile(),
+                klass->GetClassDef()) {}
+
+  const DexFile& GetDexFile() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return dex_file_;
+  }
+
+  const DexFile::ClassDef* GetClassDef() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return class_def_;
+  }
+
+  ObjPtr<mirror::DexCache> GetDexCache() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (method_ != nullptr) {
+      return method_->GetDexCache();
+    } else {
+      return real_klass_->GetDexCache();
+    }
+  }
+
+  ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (method_ != nullptr) {
+      return method_->GetDeclaringClass()->GetClassLoader();
+    } else {
+      return real_klass_->GetClassLoader();
+    }
+  }
+
+  ObjPtr<mirror::Class> GetRealClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (method_ != nullptr) {
+      return method_->GetDeclaringClass();
+    } else {
+      return real_klass_.Get();
+    }
+  }
+
+ private:
+  ClassData(Handle<mirror::Class> klass,
+            ArtMethod* method,
+            const DexFile& dex_file,
+            const DexFile::ClassDef* class_def) REQUIRES_SHARED(Locks::mutator_lock_)
+      : real_klass_(klass),
+        method_(method),
+        dex_file_(dex_file),
+        class_def_(class_def) {
+    DCHECK((method_ == nullptr) || real_klass_.IsNull());
+  }
+
+  Handle<mirror::Class> real_klass_;
+  ArtMethod* method_;
+  const DexFile& dex_file_;
+  const DexFile::ClassDef* class_def_;
+
+  DISALLOW_COPY_AND_ASSIGN(ClassData);
+};
+
+mirror::Object* CreateAnnotationMember(const ClassData& klass,
                                        Handle<mirror::Class> annotation_class,
                                        const uint8_t** annotation)
     REQUIRES_SHARED(Locks::mutator_lock_);
@@ -185,9 +258,8 @@
 const DexFile::AnnotationSetItem* FindAnnotationSetForMethod(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const DexFile* dex_file = method->GetDexFile();
-  mirror::Class* klass = method->GetDeclaringClass();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file->GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file->GetAnnotationsDirectory(method->GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
@@ -209,9 +281,8 @@
 const DexFile::ParameterAnnotationsItem* FindAnnotationsItemForMethod(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const DexFile* dex_file = method->GetDexFile();
-  mirror::Class* klass = method->GetDeclaringClass();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file->GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file->GetAnnotationsDirectory(method->GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
@@ -230,30 +301,34 @@
   return nullptr;
 }
 
-const DexFile::AnnotationSetItem* FindAnnotationSetForClass(Handle<mirror::Class> klass)
+const DexFile::AnnotationSetItem* FindAnnotationSetForClass(const ClassData& klass)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file.GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file.GetAnnotationsDirectory(*klass.GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
   return dex_file.GetClassAnnotationSet(annotations_dir);
 }
 
-mirror::Object* ProcessEncodedAnnotation(Handle<mirror::Class> klass, const uint8_t** annotation)
+mirror::Object* ProcessEncodedAnnotation(const ClassData& klass, const uint8_t** annotation)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   uint32_t type_index = DecodeUnsignedLeb128(annotation);
   uint32_t size = DecodeUnsignedLeb128(annotation);
 
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
-  StackHandleScope<2> hs(self);
+  StackHandleScope<4> hs(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Handle<mirror::Class> annotation_class(hs.NewHandle(
-      class_linker->ResolveType(klass->GetDexFile(), dex::TypeIndex(type_index), klass.Get())));
+      class_linker->ResolveType(klass.GetDexFile(),
+                                dex::TypeIndex(type_index),
+                                hs.NewHandle(klass.GetDexCache()),
+                                hs.NewHandle(klass.GetClassLoader()))));
   if (annotation_class == nullptr) {
-    LOG(INFO) << "Unable to resolve " << klass->PrettyClass() << " annotation class " << type_index;
+    LOG(INFO) << "Unable to resolve " << klass.GetRealClass()->PrettyClass()
+              << " annotation class " << type_index;
     DCHECK(Thread::Current()->IsExceptionPending());
     Thread::Current()->ClearException();
     return nullptr;
@@ -300,13 +375,13 @@
 }
 
 template <bool kTransactionActive>
-bool ProcessAnnotationValue(Handle<mirror::Class> klass,
+bool ProcessAnnotationValue(const ClassData& klass,
                             const uint8_t** annotation_ptr,
                             DexFile::AnnotationValue* annotation_value,
                             Handle<mirror::Class> array_class,
                             DexFile::AnnotationResultStyle result_style)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ObjPtr<mirror::Object> element_object = nullptr;
   bool set_object = false;
@@ -361,9 +436,8 @@
         annotation_value->value_.SetI(index);
       } else {
         StackHandleScope<1> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         element_object = Runtime::Current()->GetClassLinker()->ResolveString(
-            klass->GetDexFile(), dex::StringIndex(index), dex_cache);
+            klass.GetDexFile(), dex::StringIndex(index), hs.NewHandle(klass.GetDexCache()));
         set_object = true;
         if (element_object == nullptr) {
           return false;
@@ -377,8 +451,12 @@
         annotation_value->value_.SetI(index);
       } else {
         dex::TypeIndex type_index(index);
+        StackHandleScope<2> hs(self);
         element_object = Runtime::Current()->GetClassLinker()->ResolveType(
-            klass->GetDexFile(), type_index, klass.Get());
+            klass.GetDexFile(),
+            type_index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()));
         set_object = true;
         if (element_object == nullptr) {
           CHECK(self->IsExceptionPending());
@@ -399,12 +477,13 @@
       if (result_style == DexFile::kAllRaw) {
         annotation_value->value_.SetI(index);
       } else {
-        StackHandleScope<2> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+        StackHandleScope<2> hs(self);
         ArtMethod* method = class_linker->ResolveMethodWithoutInvokeType(
-            klass->GetDexFile(), index, dex_cache, class_loader);
+            klass.GetDexFile(),
+            index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()));
         if (method == nullptr) {
           return false;
         }
@@ -439,10 +518,11 @@
         annotation_value->value_.SetI(index);
       } else {
         StackHandleScope<2> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(
-            klass->GetDexFile(), index, dex_cache, class_loader);
+            klass.GetDexFile(),
+            index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()));
         if (field == nullptr) {
           return false;
         }
@@ -467,10 +547,12 @@
         annotation_value->value_.SetI(index);
       } else {
         StackHandleScope<3> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
-            klass->GetDexFile(), index, dex_cache, class_loader, true);
+            klass.GetDexFile(),
+            index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()),
+            true);
         if (enum_field == nullptr) {
           return false;
         } else {
@@ -595,10 +677,10 @@
   return true;
 }
 
-mirror::Object* CreateAnnotationMember(Handle<mirror::Class> klass,
+mirror::Object* CreateAnnotationMember(const ClassData& klass,
                                        Handle<mirror::Class> annotation_class,
                                        const uint8_t** annotation) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<5> hs(self);
@@ -666,12 +748,12 @@
 }
 
 const DexFile::AnnotationItem* GetAnnotationItemFromAnnotationSet(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set,
     uint32_t visibility,
     Handle<mirror::Class> annotation_class)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   for (uint32_t i = 0; i < annotation_set->size_; ++i) {
     const DexFile::AnnotationItem* annotation_item = dex_file.GetAnnotationItem(annotation_set, i);
     if (!IsVisibilityCompatible(annotation_item->visibility_, visibility)) {
@@ -679,12 +761,16 @@
     }
     const uint8_t* annotation = annotation_item->annotation_;
     uint32_t type_index = DecodeUnsignedLeb128(&annotation);
+    StackHandleScope<2> hs(Thread::Current());
     mirror::Class* resolved_class = Runtime::Current()->GetClassLinker()->ResolveType(
-        klass->GetDexFile(), dex::TypeIndex(type_index), klass.Get());
+        klass.GetDexFile(),
+        dex::TypeIndex(type_index),
+        hs.NewHandle(klass.GetDexCache()),
+        hs.NewHandle(klass.GetClassLoader()));
     if (resolved_class == nullptr) {
       std::string temp;
       LOG(WARNING) << StringPrintf("Unable to resolve %s annotation class %d",
-                                   klass->GetDescriptor(&temp), type_index);
+                                   klass.GetRealClass()->GetDescriptor(&temp), type_index);
       CHECK(Thread::Current()->IsExceptionPending());
       Thread::Current()->ClearException();
       continue;
@@ -698,7 +784,7 @@
 }
 
 mirror::Object* GetAnnotationObjectFromAnnotationSet(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set,
     uint32_t visibility,
     Handle<mirror::Class> annotation_class)
@@ -712,13 +798,13 @@
   return ProcessEncodedAnnotation(klass, &annotation);
 }
 
-mirror::Object* GetAnnotationValue(Handle<mirror::Class> klass,
+mirror::Object* GetAnnotationValue(const ClassData& klass,
                                    const DexFile::AnnotationItem* annotation_item,
                                    const char* annotation_name,
                                    Handle<mirror::Class> array_class,
                                    uint32_t expected_type)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   const uint8_t* annotation =
       SearchEncodedAnnotation(dex_file, annotation_item->annotation_, annotation_name);
   if (annotation == nullptr) {
@@ -745,10 +831,10 @@
   return annotation_value.value_.GetL();
 }
 
-mirror::ObjectArray<mirror::String>* GetSignatureValue(Handle<mirror::Class> klass,
+mirror::ObjectArray<mirror::String>* GetSignatureValue(const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   StackHandleScope<1> hs(Thread::Current());
   const DexFile::AnnotationItem* annotation_item =
       SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/Signature;",
@@ -771,10 +857,10 @@
   return obj->AsObjectArray<mirror::String>();
 }
 
-mirror::ObjectArray<mirror::Class>* GetThrowsValue(Handle<mirror::Class> klass,
+mirror::ObjectArray<mirror::Class>* GetThrowsValue(const ClassData& klass,
                                                    const DexFile::AnnotationSetItem* annotation_set)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   StackHandleScope<1> hs(Thread::Current());
   const DexFile::AnnotationItem* annotation_item =
       SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/Throws;",
@@ -798,11 +884,11 @@
 }
 
 mirror::ObjectArray<mirror::Object>* ProcessAnnotationSet(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set,
     uint32_t visibility)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<2> hs(self);
@@ -856,11 +942,11 @@
 }
 
 mirror::ObjectArray<mirror::Object>* ProcessAnnotationSetRefList(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetRefList* set_ref_list,
     uint32_t size)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<1> hs(self);
@@ -899,15 +985,17 @@
     return nullptr;
   }
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
-  return GetAnnotationObjectFromAnnotationSet(field_class, annotation_set,
-                                              DexFile::kDexVisibilityRuntime, annotation_class);
+  const ClassData field_class(hs, field);
+  return GetAnnotationObjectFromAnnotationSet(field_class,
+                                              annotation_set,
+                                              DexFile::kDexVisibilityRuntime,
+                                              annotation_class);
 }
 
 mirror::ObjectArray<mirror::Object>* GetAnnotationsForField(ArtField* field) {
   const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForField(field);
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
+  const ClassData field_class(hs, field);
   return ProcessAnnotationSet(field_class, annotation_set, DexFile::kDexVisibilityRuntime);
 }
 
@@ -917,7 +1005,7 @@
     return nullptr;
   }
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
+  const ClassData field_class(hs, field);
   return GetSignatureValue(field_class, annotation_set);
 }
 
@@ -927,17 +1015,17 @@
     return false;
   }
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
+  const ClassData field_class(hs, field);
   const DexFile::AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
       field_class, annotation_set, DexFile::kDexVisibilityRuntime, annotation_class);
   return annotation_item != nullptr;
 }
 
 mirror::Object* GetAnnotationDefaultValue(ArtMethod* method) {
-  const DexFile* dex_file = method->GetDexFile();
-  mirror::Class* klass = method->GetDeclaringClass();
+  const ClassData klass(method);
+  const DexFile* dex_file = &klass.GetDexFile();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file->GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file->GetAnnotationsDirectory(*klass.GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
@@ -965,10 +1053,9 @@
     return nullptr;
   }
   DexFile::AnnotationValue annotation_value;
-  StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::Class> h_klass(hs.NewHandle(klass));
+  StackHandleScope<1> hs(Thread::Current());
   Handle<mirror::Class> return_type(hs.NewHandle(method->GetReturnType(true /* resolve */)));
-  if (!ProcessAnnotationValue<false>(h_klass,
+  if (!ProcessAnnotationValue<false>(klass,
                                      &annotation,
                                      &annotation_value,
                                      return_type,
@@ -983,17 +1070,15 @@
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetAnnotationObjectFromAnnotationSet(method_class, annotation_set,
+  return GetAnnotationObjectFromAnnotationSet(ClassData(method), annotation_set,
                                               DexFile::kDexVisibilityRuntime, annotation_class);
 }
 
 mirror::ObjectArray<mirror::Object>* GetAnnotationsForMethod(ArtMethod* method) {
   const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return ProcessAnnotationSet(method_class, annotation_set, DexFile::kDexVisibilityRuntime);
+  return ProcessAnnotationSet(ClassData(method),
+                              annotation_set,
+                              DexFile::kDexVisibilityRuntime);
 }
 
 mirror::ObjectArray<mirror::Class>* GetExceptionTypesForMethod(ArtMethod* method) {
@@ -1001,9 +1086,7 @@
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetThrowsValue(method_class, annotation_set);
+  return GetThrowsValue(ClassData(method), annotation_set);
 }
 
 mirror::ObjectArray<mirror::Object>* GetParameterAnnotations(ArtMethod* method) {
@@ -1019,9 +1102,7 @@
     return nullptr;
   }
   uint32_t size = set_ref_list->size_;
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return ProcessAnnotationSetRefList(method_class, set_ref_list, size);
+  return ProcessAnnotationSetRefList(ClassData(method), set_ref_list, size);
 }
 
 mirror::Object* GetAnnotationForMethodParameter(ArtMethod* method,
@@ -1045,9 +1126,7 @@
   const DexFile::AnnotationSetItem* annotation_set =
      dex_file->GetSetRefItemItem(annotation_set_ref);
 
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetAnnotationObjectFromAnnotationSet(method_class,
+  return GetAnnotationObjectFromAnnotationSet(ClassData(method),
                                               annotation_set,
                                               DexFile::kDexVisibilityRuntime,
                                               annotation_class);
@@ -1072,7 +1151,7 @@
     return false;
   }
 
-  StackHandleScope<5> hs(Thread::Current());
+  StackHandleScope<4> hs(Thread::Current());
 
   // Extract the parameters' names String[].
   ObjPtr<mirror::Class> string_class = mirror::String::GetJavaLangString();
@@ -1082,9 +1161,9 @@
     return false;
   }
 
-  Handle<mirror::Class> klass = hs.NewHandle(method->GetDeclaringClass());
+  ClassData data(method);
   Handle<mirror::Object> names_obj =
-      hs.NewHandle(GetAnnotationValue(klass,
+      hs.NewHandle(GetAnnotationValue(data,
                                       annotation_item,
                                       "names",
                                       string_array_class,
@@ -1099,7 +1178,7 @@
     return false;
   }
   Handle<mirror::Object> access_flags_obj =
-      hs.NewHandle(GetAnnotationValue(klass,
+      hs.NewHandle(GetAnnotationValue(data,
                                       annotation_item,
                                       "accessFlags",
                                       int_array_class,
@@ -1118,9 +1197,7 @@
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetSignatureValue(method_class, annotation_set);
+  return GetSignatureValue(ClassData(method), annotation_set);
 }
 
 bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class,
@@ -1129,37 +1206,39 @@
   if (annotation_set == nullptr) {
     return false;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
   const DexFile::AnnotationItem* annotation_item =
-      GetAnnotationItemFromAnnotationSet(method_class, annotation_set, visibility,
-                                         annotation_class);
+      GetAnnotationItemFromAnnotationSet(ClassData(method),
+                                         annotation_set, visibility, annotation_class);
   return annotation_item != nullptr;
 }
 
 mirror::Object* GetAnnotationForClass(Handle<mirror::Class> klass,
                                       Handle<mirror::Class> annotation_class) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  return GetAnnotationObjectFromAnnotationSet(klass, annotation_set, DexFile::kDexVisibilityRuntime,
+  return GetAnnotationObjectFromAnnotationSet(data,
+                                              annotation_set,
+                                              DexFile::kDexVisibilityRuntime,
                                               annotation_class);
 }
 
 mirror::ObjectArray<mirror::Object>* GetAnnotationsForClass(Handle<mirror::Class> klass) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
-  return ProcessAnnotationSet(klass, annotation_set, DexFile::kDexVisibilityRuntime);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
+  return ProcessAnnotationSet(data, annotation_set, DexFile::kDexVisibilityRuntime);
 }
 
 mirror::ObjectArray<mirror::Class>* GetDeclaredClasses(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/MemberClasses;",
+      SearchAnnotationSet(data.GetDexFile(), annotation_set, "Ldalvik/annotation/MemberClasses;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
@@ -1172,7 +1251,7 @@
     return nullptr;
   }
   mirror::Object* obj =
-      GetAnnotationValue(klass, annotation_item, "value", class_array_class,
+      GetAnnotationValue(data, annotation_item, "value", class_array_class,
                          DexFile::kDexAnnotationArray);
   if (obj == nullptr) {
     return nullptr;
@@ -1181,18 +1260,18 @@
 }
 
 mirror::Class* GetDeclaringClass(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/EnclosingClass;",
+      SearchAnnotationSet(data.GetDexFile(), annotation_set, "Ldalvik/annotation/EnclosingClass;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Object* obj = GetAnnotationValue(klass, annotation_item, "value",
+  mirror::Object* obj = GetAnnotationValue(data, annotation_item, "value",
                                            ScopedNullHandle<mirror::Class>(),
                                            DexFile::kDexAnnotationType);
   if (obj == nullptr) {
@@ -1202,28 +1281,30 @@
 }
 
 mirror::Class* GetEnclosingClass(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
   mirror::Class* declaring_class = GetDeclaringClass(klass);
   if (declaring_class != nullptr) {
     return declaring_class;
   }
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/EnclosingMethod;",
+      SearchAnnotationSet(data.GetDexFile(),
+                          annotation_set,
+                          "Ldalvik/annotation/EnclosingMethod;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
   }
   const uint8_t* annotation =
-      SearchEncodedAnnotation(dex_file, annotation_item->annotation_, "value");
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value");
   if (annotation == nullptr) {
     return nullptr;
   }
   DexFile::AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue<false>(klass,
+  if (!ProcessAnnotationValue<false>(data,
                                      &annotation,
                                      &annotation_value,
                                      ScopedNullHandle<mirror::Class>(),
@@ -1234,10 +1315,11 @@
     return nullptr;
   }
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
   ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
-      klass->GetDexFile(), annotation_value.value_.GetI(), dex_cache, class_loader);
+      data.GetDexFile(),
+      annotation_value.value_.GetI(),
+      hs.NewHandle(data.GetDexCache()),
+      hs.NewHandle(data.GetClassLoader()));
   if (method == nullptr) {
     return nullptr;
   }
@@ -1245,39 +1327,44 @@
 }
 
 mirror::Object* GetEnclosingMethod(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/EnclosingMethod;",
+      SearchAnnotationSet(data.GetDexFile(),
+                          annotation_set,
+                          "Ldalvik/annotation/EnclosingMethod;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  return GetAnnotationValue(klass, annotation_item, "value", ScopedNullHandle<mirror::Class>(),
+  return GetAnnotationValue(data, annotation_item, "value", ScopedNullHandle<mirror::Class>(),
       DexFile::kDexAnnotationMethod);
 }
 
 bool GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return false;
   }
   const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet(
-      dex_file, annotation_set, "Ldalvik/annotation/InnerClass;", DexFile::kDexVisibilitySystem);
+      data.GetDexFile(),
+      annotation_set,
+      "Ldalvik/annotation/InnerClass;",
+      DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return false;
   }
   const uint8_t* annotation =
-      SearchEncodedAnnotation(dex_file, annotation_item->annotation_, "name");
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "name");
   if (annotation == nullptr) {
     return false;
   }
   DexFile::AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue<false>(klass,
+  if (!ProcessAnnotationValue<false>(data,
                                      &annotation,
                                      &annotation_value,
                                      ScopedNullHandle<mirror::Class>(),
@@ -1293,24 +1380,24 @@
 }
 
 bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return false;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/InnerClass;",
+      SearchAnnotationSet(data.GetDexFile(), annotation_set, "Ldalvik/annotation/InnerClass;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return false;
   }
   const uint8_t* annotation =
-      SearchEncodedAnnotation(dex_file, annotation_item->annotation_, "accessFlags");
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "accessFlags");
   if (annotation == nullptr) {
     return false;
   }
   DexFile::AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue<false>(klass,
+  if (!ProcessAnnotationValue<false>(data,
                                      &annotation,
                                      &annotation_value,
                                      ScopedNullHandle<mirror::Class>(),
@@ -1325,20 +1412,22 @@
 }
 
 mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  return GetSignatureValue(klass, annotation_set);
+  return GetSignatureValue(data, annotation_set);
 }
 
 bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return false;
   }
   const DexFile::AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
-      klass, annotation_set, DexFile::kDexVisibilityRuntime, annotation_class);
+      data, annotation_set, DexFile::kDexVisibilityRuntime, annotation_class);
   return annotation_item != nullptr;
 }
 
diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc
index 5167869..db65e40 100644
--- a/runtime/dexopt_test.cc
+++ b/runtime/dexopt_test.cc
@@ -111,7 +111,7 @@
                                                  &error_msg));
   ASSERT_TRUE(image_header != nullptr) << error_msg;
   const OatHeader& oat_header = odex_file->GetOatHeader();
-  uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
+  uint32_t combined_checksum = image_header->GetOatChecksum();
 
   if (CompilerFilter::DependsOnImageChecksum(filter)) {
     if (with_alternate_image) {
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 47c6b51..355d7b3 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -32,12 +32,9 @@
 namespace art {
 
 static inline void BssWriteBarrier(ArtMethod* outer_method) REQUIRES_SHARED(Locks::mutator_lock_) {
-  // For non-CC AOT code, we need a write barrier for the class loader that holds the
-  // GC roots in the .bss. For CC, we do not need to do anything because the roots
-  // we're storing are all referencing to-space and do not need to be re-visited.
-  // However, we do the DCHECK() for the registration of oat files with .bss sections.
-  const DexFile* dex_file =
-      (kUseReadBarrier && !kIsDebugBuild) ? nullptr : outer_method->GetDexFile();
+  // For AOT code, we need a write barrier for the class loader that holds the
+  // GC roots in the .bss.
+  const DexFile* dex_file = outer_method->GetDexFile();
   if (dex_file != nullptr &&
       dex_file->GetOatDexFile() != nullptr &&
       !dex_file->GetOatDexFile()->GetOatFile()->GetBssGcRoots().empty()) {
@@ -50,15 +47,13 @@
           << "Oat file with .bss GC roots was not registered in class table: "
           << dex_file->GetOatDexFile()->GetOatFile()->GetLocation();
     }
-    if (!kUseReadBarrier) {
-      if (class_loader != nullptr) {
-        // Note that we emit the barrier before the compiled code stores the String or Class
-        // as a GC root. This is OK as there is no suspend point point in between.
-        Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
-      } else {
-        Runtime::Current()->GetClassLinker()->WriteBarrierForBootOatFileBssRoots(
-            dex_file->GetOatDexFile()->GetOatFile());
-      }
+    if (class_loader != nullptr) {
+      // Note that we emit the barrier before the compiled code stores the String or Class
+      // as a GC root. This is OK as there is no suspend point point in between.
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+    } else {
+      Runtime::Current()->GetClassLinker()->WriteBarrierForBootOatFileBssRoots(
+          dex_file->GetOatDexFile()->GetOatFile());
     }
   }
 }
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index c8ee99a..1520e13 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -62,9 +62,7 @@
 extern "C" NO_RETURN void artThrowNullPointerExceptionFromSignal(uintptr_t addr, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  self->NoteSignalBeingHandled();
   ThrowNullPointerExceptionFromDexPC(/* check_address */ true, addr);
-  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
@@ -95,9 +93,7 @@
 extern "C" NO_RETURN void artThrowStackOverflowFromCode(Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  self->NoteSignalBeingHandled();
   ThrowStackOverflowError(self);
-  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 4c3990a..3fd20a6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2323,48 +2323,26 @@
   return artInvokeCommon<kVirtual, true>(method_idx, this_object, self, sp);
 }
 
-// Determine target of interface dispatch. This object is known non-null. First argument
-// is there for consistency but should not be used, as some architectures overwrite it
-// in the assembly trampoline.
-extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUTE_UNUSED,
+// Determine target of interface dispatch. The interface method and this object are known non-null.
+// The interface method is the method returned by the dex cache in the conflict trampoline.
+extern "C" TwoWordReturn artInvokeInterfaceTrampoline(ArtMethod* interface_method,
                                                       mirror::Object* raw_this_object,
                                                       Thread* self,
                                                       ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  CHECK(interface_method != nullptr);
   ObjPtr<mirror::Object> this_object(raw_this_object);
   ScopedQuickEntrypointChecks sqec(self);
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> cls(hs.NewHandle(this_object->GetClass()));
 
   ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
-
-  // Fetch the dex_method_idx of the target interface method from the caller.
-  uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
-
-  const DexFile::CodeItem* code_item = caller_method->GetCodeItem();
-  CHECK_LT(dex_pc, code_item->insns_size_in_code_units_);
-  const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-  Instruction::Code instr_code = instr->Opcode();
-  CHECK(instr_code == Instruction::INVOKE_INTERFACE ||
-        instr_code == Instruction::INVOKE_INTERFACE_RANGE)
-      << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
-  uint32_t dex_method_idx;
-  if (instr_code == Instruction::INVOKE_INTERFACE) {
-    dex_method_idx = instr->VRegB_35c();
-  } else {
-    CHECK_EQ(instr_code, Instruction::INVOKE_INTERFACE_RANGE);
-    dex_method_idx = instr->VRegB_3rc();
-  }
-
-  ArtMethod* interface_method = caller_method->GetDexCacheResolvedMethod(
-      dex_method_idx, kRuntimePointerSize);
-  DCHECK(interface_method != nullptr) << dex_method_idx << " " << caller_method->PrettyMethod();
   ArtMethod* method = nullptr;
   ImTable* imt = cls->GetImt(kRuntimePointerSize);
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
-    // If the dex cache already resolved the interface method, look whether we have
-    // a match in the ImtConflictTable.
+    // If the interface method is already resolved, look whether we have a match in the
+    // ImtConflictTable.
     ArtMethod* conflict_method = imt->Get(ImTable::GetImtIndex(interface_method),
                                           kRuntimePointerSize);
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
@@ -2389,9 +2367,26 @@
       return GetTwoWordFailureValue();  // Failure.
     }
   } else {
-    // The dex cache did not resolve the method, look it up in the dex file
-    // of the caller,
+    // The interface method is unresolved, so look it up in the dex file of the caller.
     DCHECK_EQ(interface_method, Runtime::Current()->GetResolutionMethod());
+
+    // Fetch the dex_method_idx of the target interface method from the caller.
+    uint32_t dex_method_idx;
+    uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
+    const DexFile::CodeItem* code_item = caller_method->GetCodeItem();
+    DCHECK_LT(dex_pc, code_item->insns_size_in_code_units_);
+    const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+    Instruction::Code instr_code = instr->Opcode();
+    DCHECK(instr_code == Instruction::INVOKE_INTERFACE ||
+           instr_code == Instruction::INVOKE_INTERFACE_RANGE)
+        << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
+    if (instr_code == Instruction::INVOKE_INTERFACE) {
+      dex_method_idx = instr->VRegB_35c();
+    } else {
+      DCHECK_EQ(instr_code, Instruction::INVOKE_INTERFACE_RANGE);
+      dex_method_idx = instr->VRegB_3rc();
+    }
+
     const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()
         ->GetDexFile();
     uint32_t shorty_len;
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index d0687ce..55a4625 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -133,9 +133,8 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, nested_signal_state,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, flip_function,
                         sizeof(void*) * kLockLevelCount);
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, nested_signal_state, flip_function, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, method_verifier, thread_local_mark_stack, sizeof(void*));
     EXPECT_OFFSET_DIFF(Thread, tlsPtr_.thread_local_mark_stack, Thread, wait_mutex_, sizeof(void*),
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index f9345b6..64128cc 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -28,47 +28,6 @@
 #include "thread-inl.h"
 #include "verify_object-inl.h"
 
-// Note on nested signal support
-// -----------------------------
-//
-// Typically a signal handler should not need to deal with signals that occur within it.
-// However, when a SIGSEGV occurs that is in generated code and is not one of the
-// handled signals (implicit checks), we call a function to try to dump the stack
-// to the log.  This enhances the debugging experience but may have the side effect
-// that it may not work.  If the cause of the original SIGSEGV is a corrupted stack or other
-// memory region, the stack backtrace code may run into trouble and may either crash
-// or fail with an abort (SIGABRT).  In either case we don't want that (new) signal to
-// mask the original signal and thus prevent useful debug output from being presented.
-//
-// In order to handle this situation, before we call the stack tracer we do the following:
-//
-// 1. shutdown the fault manager so that we are talking to the real signal management
-//    functions rather than those in sigchain.
-// 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the
-//    thread running the signal handler.
-// 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler.
-// 4. save the thread's state to the TLS of the current thread using 'setjmp'
-//
-// We then call the stack tracer and one of two things may happen:
-// a. it completes successfully
-// b. it crashes and a signal is raised.
-//
-// In the former case, we fall through and everything is fine.  In the latter case
-// our secondary signal handler gets called in a signal context.  This results in
-// a call to FaultManager::HandledNestedSignal(), an archirecture specific function
-// whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current
-// thread.  This results in a return with a non-zero value from 'setjmp'.  We detect this
-// and write something to the log to tell the user that it happened.
-//
-// Regardless of how we got there, we reach the code after the stack tracer and we
-// restore the signal states to their original values, reinstate the fault manager (thus
-// reestablishing the signal chain) and continue.
-
-// This is difficult to test with a runtime test.  To invoke the nested signal code
-// on any signal, uncomment the following line and run something that throws a
-// NullPointerException.
-// #define TEST_NESTED_SIGNAL
-
 namespace art {
 // Static fault manger object accessed by signal handler.
 FaultManager fault_manager;
@@ -83,11 +42,6 @@
   fault_manager.HandleFault(sig, info, context);
 }
 
-// Signal handler for dealing with a nested signal.
-static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) {
-  fault_manager.HandleNestedSignal(sig, info, context);
-}
-
 FaultManager::FaultManager() : initialized_(false) {
   sigaction(SIGSEGV, nullptr, &oldaction_);
 }
@@ -156,122 +110,93 @@
   DCHECK(self != nullptr);
   DCHECK(Runtime::Current() != nullptr);
   DCHECK(Runtime::Current()->IsStarted());
-
-  // Now set up the nested signal handler.
-
-  // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully.
-  static const int handled_nested_signals[] = {SIGABRT};
-  constexpr size_t num_handled_nested_signals = arraysize(handled_nested_signals);
-
-  // Release the fault manager so that it will remove the signal chain for
-  // SIGSEGV and we call the real sigaction.
-  fault_manager.Release();
-
-  // The action for SIGSEGV should be the default handler now.
-
-  // Unblock the signals we allow so that they can be delivered in the signal handler.
-  sigset_t sigset;
-  sigemptyset(&sigset);
-  for (int signal : handled_nested_signals) {
-    sigaddset(&sigset, signal);
-  }
-  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
-
-  // If we get a signal in this code we want to invoke our nested signal
-  // handler.
-  struct sigaction action;
-  struct sigaction oldactions[num_handled_nested_signals];
-  action.sa_sigaction = art_nested_signal_handler;
-
-  // Explicitly mask out SIGSEGV and SIGABRT from the nested signal handler.  This
-  // should be the default but we definitely don't want these happening in our
-  // nested signal handler.
-  sigemptyset(&action.sa_mask);
-  for (int signal : handled_nested_signals) {
-    sigaddset(&action.sa_mask, signal);
-  }
-
-  action.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__APPLE__) && !defined(__mips__)
-  action.sa_restorer = nullptr;
-#endif
-
-  // Catch handled signals to invoke our nested handler.
-  bool success = true;
-  for (size_t i = 0; i < num_handled_nested_signals; ++i) {
-    success = sigaction(handled_nested_signals[i], &action, &oldactions[i]) == 0;
-    if (!success) {
-      PLOG(ERROR) << "Unable to set up nested signal handler";
-      break;
+  for (const auto& handler : other_handlers_) {
+    if (handler->Action(sig, info, context)) {
+      return true;
     }
   }
-
-  if (success) {
-    // Save the current state and call the handlers.  If anything causes a signal
-    // our nested signal handler will be invoked and this will longjmp to the saved
-    // state.
-    if (setjmp(*self->GetNestedSignalState()) == 0) {
-      for (const auto& handler : other_handlers_) {
-        if (handler->Action(sig, info, context)) {
-          // Restore the signal handlers, reinit the fault manager and return.  Signal was
-          // handled.
-          for (size_t i = 0; i < num_handled_nested_signals; ++i) {
-            success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
-            if (!success) {
-              PLOG(ERROR) << "Unable to restore signal handler";
-            }
-          }
-          fault_manager.Init();
-          return true;
-        }
-      }
-    } else {
-      LOG(ERROR) << "Nested signal detected - original signal being reported";
-    }
-
-    // Restore the signal handlers.
-    for (size_t i = 0; i < num_handled_nested_signals; ++i) {
-      success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
-      if (!success) {
-        PLOG(ERROR) << "Unable to restore signal handler";
-      }
-    }
-  }
-
-  // Now put the fault manager back in place.
-  fault_manager.Init();
   return false;
 }
 
+class ScopedSignalUnblocker {
+ public:
+  explicit ScopedSignalUnblocker(const std::initializer_list<int>& signals) {
+    sigset_t new_mask;
+    sigemptyset(&new_mask);
+    for (int signal : signals) {
+      sigaddset(&new_mask, signal);
+    }
+    if (sigprocmask(SIG_UNBLOCK, &new_mask, &previous_mask_) != 0) {
+      PLOG(FATAL) << "failed to unblock signals";
+    }
+  }
+
+  ~ScopedSignalUnblocker() {
+    if (sigprocmask(SIG_SETMASK, &previous_mask_, nullptr) != 0) {
+      PLOG(FATAL) << "failed to unblock signals";
+    }
+  }
+
+ private:
+  sigset_t previous_mask_;
+};
+
+class ScopedHandlingSignalSetter {
+ public:
+  explicit ScopedHandlingSignalSetter(Thread* thread) : thread_(thread) {
+    CHECK(!thread->HandlingSignal());
+    thread_->SetHandlingSignal(true);
+  }
+
+  ~ScopedHandlingSignalSetter() {
+    CHECK(thread_->HandlingSignal());
+    thread_->SetHandlingSignal(false);
+  }
+
+ private:
+  Thread* thread_;
+};
+
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
   //
   // If malloc calls abort, it will be holding its lock.
   // If the handler tries to call malloc, it will deadlock.
-  VLOG(signals) << "Handling fault";
-  if (IsInGeneratedCode(info, context, true)) {
-    VLOG(signals) << "in generated code, looking for handler";
-    for (const auto& handler : generated_code_handlers_) {
-      VLOG(signals) << "invoking Action on handler " << handler;
-      if (handler->Action(sig, info, context)) {
-#ifdef TEST_NESTED_SIGNAL
-        // In test mode we want to fall through to stack trace handler
-        // on every signal (in reality this will cause a crash on the first
-        // signal).
-        break;
-#else
-        // We have handled a signal so it's time to return from the
-        // signal handler to the appropriate place.
-        return;
-#endif
-      }
-    }
 
-    // We hit a signal we didn't handle.  This might be something for which
-    // we can give more information about so call all registered handlers to see
-    // if it is.
-    if (HandleFaultByOtherHandlers(sig, info, context)) {
+  // Use a thread local field to track whether we're recursing, and fall back.
+  // (e.g.. if one of our handlers crashed)
+  Thread* thread = Thread::Current();
+
+  if (thread != nullptr && !thread->HandlingSignal()) {
+    // Unblock some signals and set thread->handling_signal_ to true,
+    // so that we can catch crashes in our signal handler.
+    ScopedHandlingSignalSetter setter(thread);
+    ScopedSignalUnblocker unblocker { SIGABRT, SIGBUS, SIGSEGV }; // NOLINT
+
+    VLOG(signals) << "Handling fault";
+
+#ifdef TEST_NESTED_SIGNAL
+    // Simulate a crash in a handler.
+    raise(SIGSEGV);
+#endif
+
+    if (IsInGeneratedCode(info, context, true)) {
+      VLOG(signals) << "in generated code, looking for handler";
+      for (const auto& handler : generated_code_handlers_) {
+        VLOG(signals) << "invoking Action on handler " << handler;
+        if (handler->Action(sig, info, context)) {
+          // We have handled a signal so it's time to return from the
+          // signal handler to the appropriate place.
+          return;
+        }
+      }
+
+      // We hit a signal we didn't handle.  This might be something for which
+      // we can give more information about so call all registered handlers to
+      // see if it is.
+      if (HandleFaultByOtherHandlers(sig, info, context)) {
         return;
+      }
     }
   }
 
@@ -417,11 +342,7 @@
 
 bool JavaStackTraceHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-#ifdef TEST_NESTED_SIGNAL
-  bool in_generated_code = true;
-#else
   bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
-#endif
   if (in_generated_code) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
     ArtMethod* method = nullptr;
@@ -432,12 +353,6 @@
     manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     self->SetTopOfStack(reinterpret_cast<ArtMethod**>(sp));
-#ifdef TEST_NESTED_SIGNAL
-    // To test the nested signal handler we raise a signal here.  This will cause the
-    // nested signal handler to be called and perform a longjmp back to the setjmp
-    // above.
-    abort();
-#endif
     self->DumpJavaStack(LOG_STREAM(ERROR));
   }
 
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 56e0fb7..ce59ba7 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -45,7 +45,6 @@
   void EnsureArtActionInFrontOfSignalChain();
 
   void HandleFault(int sig, siginfo_t* info, void* context);
-  void HandleNestedSignal(int sig, siginfo_t* info, void* context);
 
   // Added handlers are owned by the fault handler and will be freed on Shutdown().
   void AddHandler(FaultHandler* handler, bool generated_code);
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 0325535..a5bb91a 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -327,7 +327,7 @@
 class EmptyMarkObjectVisitor : public MarkObjectVisitor {
  public:
   mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {return obj;}
-  void MarkHeapReference(mirror::HeapReference<mirror::Object>*) OVERRIDE {}
+  void MarkHeapReference(mirror::HeapReference<mirror::Object>*, bool) OVERRIDE {}
 };
 
 void ModUnionTable::FilterCards() {
@@ -459,7 +459,7 @@
     for (mirror::HeapReference<mirror::Object>* obj_ptr : references) {
       if (obj_ptr->AsMirrorPtr() != nullptr) {
         all_null = false;
-        visitor->MarkHeapReference(obj_ptr);
+        visitor->MarkHeapReference(obj_ptr, /*do_atomic_update*/ false);
       }
     }
     count += references.size();
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index cf63b30..48a8742 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -97,7 +97,8 @@
 class CollectVisitedVisitor : public MarkObjectVisitor {
  public:
   explicit CollectVisitedVisitor(std::set<mirror::Object*>* out) : out_(out) {}
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref,
+                                 bool do_atomic_update ATTRIBUTE_UNUSED) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(ref != nullptr);
     MarkObject(ref->AsMirrorPtr());
diff --git a/runtime/gc/accounting/read_barrier_table.h b/runtime/gc/accounting/read_barrier_table.h
index 86266e2..e77a5b8 100644
--- a/runtime/gc/accounting/read_barrier_table.h
+++ b/runtime/gc/accounting/read_barrier_table.h
@@ -80,7 +80,7 @@
   }
 
   // This should match RegionSpace::kRegionSize. static_assert'ed in concurrent_copying.h.
-  static constexpr size_t kRegionSize = 1 * MB;
+  static constexpr size_t kRegionSize = 256 * KB;
 
  private:
   static constexpr uint64_t kHeapCapacity = 4ULL * GB;  // low 4gb.
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 29bab01..7b1e2b8 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -74,7 +74,7 @@
     mirror::HeapReference<mirror::Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
     if (target_space_->HasAddress(ref_ptr->AsMirrorPtr())) {
       *contains_reference_to_target_space_ = true;
-      collector_->MarkHeapReference(ref_ptr);
+      collector_->MarkHeapReference(ref_ptr, /*do_atomic_update*/ false);
       DCHECK(!target_space_->HasAddress(ref_ptr->AsMirrorPtr()));
     }
   }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index f18ffb4..7136f10 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -53,6 +53,8 @@
 // Slow path mark stack size, increase this if the stack is getting full and it is causing
 // performance problems.
 static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
+// Verify that there are no missing card marks.
+static constexpr bool kVerifyNoMissingCardMarks = kIsDebugBuild;
 
 ConcurrentCopying::ConcurrentCopying(Heap* heap,
                                      const std::string& name_prefix,
@@ -109,12 +111,29 @@
   }
 }
 
-void ConcurrentCopying::MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) {
-  // Used for preserving soft references, should be OK to not have a CAS here since there should be
-  // no other threads which can trigger read barriers on the same referent during reference
-  // processing.
-  from_ref->Assign(Mark(from_ref->AsMirrorPtr()));
-  DCHECK(!from_ref->IsNull());
+void ConcurrentCopying::MarkHeapReference(mirror::HeapReference<mirror::Object>* field,
+                                          bool do_atomic_update) {
+  if (UNLIKELY(do_atomic_update)) {
+    // Used to mark the referent in DelayReferenceReferent in transaction mode.
+    mirror::Object* from_ref = field->AsMirrorPtr();
+    if (from_ref == nullptr) {
+      return;
+    }
+    mirror::Object* to_ref = Mark(from_ref);
+    if (from_ref != to_ref) {
+      do {
+        if (field->AsMirrorPtr() != from_ref) {
+          // Concurrently overwritten by a mutator.
+          break;
+        }
+      } while (!field->CasWeakRelaxed(from_ref, to_ref));
+    }
+  } else {
+    // Used for preserving soft references, should be OK to not have a CAS here since there should be
+    // no other threads which can trigger read barriers on the same referent during reference
+    // processing.
+    field->Assign(Mark(field->AsMirrorPtr()));
+  }
 }
 
 ConcurrentCopying::~ConcurrentCopying() {
@@ -138,7 +157,7 @@
     MarkingPhase();
   }
   // Verify no from space refs. This causes a pause.
-  if (kEnableNoFromSpaceRefsVerification || kIsDebugBuild) {
+  if (kEnableNoFromSpaceRefsVerification) {
     TimingLogger::ScopedTiming split("(Paused)VerifyNoFromSpaceReferences", GetTimings());
     ScopedPause pause(this, false);
     CheckEmptyMarkStack();
@@ -318,6 +337,9 @@
     TimingLogger::ScopedTiming split("(Paused)FlipCallback", cc->GetTimings());
     // Note: self is not necessarily equal to thread since thread may be suspended.
     Thread* self = Thread::Current();
+    if (kVerifyNoMissingCardMarks) {
+      cc->VerifyNoMissingCardMarks();
+    }
     CHECK(thread == self);
     Locks::mutator_lock_->AssertExclusiveHeld(self);
     cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
@@ -428,6 +450,72 @@
   }
 }
 
+class ConcurrentCopying::VerifyNoMissingCardMarkVisitor {
+ public:
+  VerifyNoMissingCardMarkVisitor(ConcurrentCopying* cc, ObjPtr<mirror::Object> holder)
+    : cc_(cc),
+      holder_(holder) {}
+
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool is_static ATTRIBUTE_UNUSED) const
+      REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
+    if (offset.Uint32Value() != mirror::Object::ClassOffset().Uint32Value()) {
+     CheckReference(obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(
+         offset), offset.Uint32Value());
+    }
+  }
+  void operator()(ObjPtr<mirror::Class> klass,
+                  ObjPtr<mirror::Reference> ref) const
+      REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+    this->operator()(ref, mirror::Reference::ReferentOffset(), false);
+  }
+
+  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    CheckReference(root->AsMirrorPtr());
+  }
+
+  void CheckReference(mirror::Object* ref, int32_t offset = -1) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    CHECK(ref == nullptr || !cc_->region_space_->IsInNewlyAllocatedRegion(ref))
+        << holder_->PrettyTypeOf() << "(" << holder_.Ptr() << ") references object "
+        << ref->PrettyTypeOf() << "(" << ref << ") in newly allocated region at offset=" << offset;
+  }
+
+ private:
+  ConcurrentCopying* const cc_;
+  ObjPtr<mirror::Object> const holder_;
+};
+
+void ConcurrentCopying::VerifyNoMissingCardMarkCallback(mirror::Object* obj, void* arg) {
+  auto* collector = reinterpret_cast<ConcurrentCopying*>(arg);
+  // Objects not on dirty cards should never have references to newly allocated regions.
+  if (!collector->heap_->GetCardTable()->IsDirty(obj)) {
+    VerifyNoMissingCardMarkVisitor visitor(collector, /*holder*/ obj);
+    obj->VisitReferences</*kVisitNativeRoots*/true, kVerifyNone, kWithoutReadBarrier>(
+        visitor,
+        visitor);
+  }
+}
+
+void ConcurrentCopying::VerifyNoMissingCardMarks() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  region_space_->Walk(&VerifyNoMissingCardMarkCallback, this);
+  {
+    ReaderMutexLock rmu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    heap_->GetLiveBitmap()->Walk(&VerifyNoMissingCardMarkCallback, this);
+  }
+}
+
 // Switch threads that from from-space to to-space refs. Forward/mark the thread roots.
 void ConcurrentCopying::FlipThreadRoots() {
   TimingLogger::ScopedTiming split("FlipThreadRoots", GetTimings());
@@ -1380,7 +1468,7 @@
     size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
     region_space_->AddLiveBytes(to_ref, alloc_size);
   }
-  if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
+  if (ReadBarrier::kEnableToSpaceInvariantChecks) {
     AssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
   }
@@ -1556,10 +1644,10 @@
     // Record freed objects.
     TimingLogger::ScopedTiming split2("RecordFree", GetTimings());
     // Don't include thread-locals that are in the to-space.
-    uint64_t from_bytes = region_space_->GetBytesAllocatedInFromSpace();
-    uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
-    uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
-    uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
+    const uint64_t from_bytes = region_space_->GetBytesAllocatedInFromSpace();
+    const uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
+    const uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
+    const uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
     uint64_t to_bytes = bytes_moved_.LoadSequentiallyConsistent();
     cumulative_bytes_moved_.FetchAndAddRelaxed(to_bytes);
     uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
@@ -1570,8 +1658,18 @@
     }
     CHECK_LE(to_objects, from_objects);
     CHECK_LE(to_bytes, from_bytes);
-    int64_t freed_bytes = from_bytes - to_bytes;
-    int64_t freed_objects = from_objects - to_objects;
+    // cleared_bytes and cleared_objects may be greater than the from space equivalents since
+    // ClearFromSpace may clear empty unevac regions.
+    uint64_t cleared_bytes;
+    uint64_t cleared_objects;
+    {
+      TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
+      region_space_->ClearFromSpace(&cleared_bytes, &cleared_objects);
+      CHECK_GE(cleared_bytes, from_bytes);
+      CHECK_GE(cleared_objects, from_objects);
+    }
+    int64_t freed_bytes = cleared_bytes - to_bytes;
+    int64_t freed_objects = cleared_objects - to_objects;
     if (kVerboseMode) {
       LOG(INFO) << "RecordFree:"
                 << " from_bytes=" << from_bytes << " from_objects=" << from_objects
@@ -1590,11 +1688,6 @@
   }
 
   {
-    TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
-    region_space_->ClearFromSpace();
-  }
-
-  {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     Sweep(false);
     SwapBitmaps();
@@ -2078,7 +2171,9 @@
       fall_back_to_non_moving = true;
       to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size,
                                                &non_moving_space_bytes_allocated, nullptr, &dummy);
-      CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed";
+      CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed for a "
+                               << obj_size << " byte object in region type "
+                               << region_space_->GetRegionType(from_ref);
       bytes_allocated = non_moving_space_bytes_allocated;
       // Mark it in the mark bitmap.
       accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -2330,7 +2425,9 @@
     MutexLock mu(self, mark_stack_lock_);
     CHECK_EQ(pooled_mark_stacks_.size(), kMarkStackPoolSize);
   }
-  {
+  // kVerifyNoMissingCardMarks relies on the region space cards not being cleared to avoid false
+  // positives.
+  if (!kVerifyNoMissingCardMarks) {
     TimingLogger::ScopedTiming split("ClearRegionSpaceCards", GetTimings());
     // We do not currently use the region space cards at all, madvise them away to save ram.
     heap_->GetCardTable()->ClearCardRange(region_space_->Begin(), region_space_->Limit());
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 844bb45..a0da9fc 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -162,6 +162,12 @@
   void VerifyGrayImmuneObjects()
       REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
+  static void VerifyNoMissingCardMarkCallback(mirror::Object* obj, void* arg)
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void VerifyNoMissingCardMarks()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
   size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access, Closure* checkpoint_callback)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access, Closure* checkpoint_callback)
@@ -176,7 +182,8 @@
   virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) OVERRIDE
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref,
+                                 bool do_atomic_update) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
@@ -329,6 +336,7 @@
   class VerifyNoFromSpaceRefsFieldVisitor;
   class VerifyNoFromSpaceRefsObjectVisitor;
   class VerifyNoFromSpaceRefsVisitor;
+  class VerifyNoMissingCardMarkVisitor;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying);
 };
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 14fd332..1e4196b 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -65,7 +65,8 @@
       name_(name),
       pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
       cumulative_timings_(name),
-      pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true) {
+      pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true),
+      is_transaction_active_(false) {
   ResetCumulativeStatistics();
 }
 
@@ -88,6 +89,9 @@
   uint64_t start_time = NanoTime();
   Iteration* current_iteration = GetCurrentIteration();
   current_iteration->Reset(gc_cause, clear_soft_references);
+  // Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't
+  // change in the middle of a GC.
+  is_transaction_active_ = Runtime::Current()->IsActiveTransaction();
   RunPhases();  // Run all the GC phases.
   // Add the current timings to the cumulative timings.
   cumulative_timings_.AddLogger(*GetTimings());
@@ -109,6 +113,7 @@
     MutexLock mu(self, pause_histogram_lock_);
     pause_histogram_.AdjustAndAddValue(pause_time);
   }
+  is_transaction_active_ = false;
 }
 
 void GarbageCollector::SwapBitmaps() {
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 95601d7..14d0499 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -199,12 +199,17 @@
   // Force mark an object.
   virtual mirror::Object* MarkObject(mirror::Object* obj)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj)
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
+                                 bool do_atomic_update)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   virtual void DelayReferenceReferent(ObjPtr<mirror::Class> klass,
                                       ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
+  bool IsTransactionActive() const {
+    return is_transaction_active_;
+  }
+
  protected:
   // Run all of the GC phases.
   virtual void RunPhases() = 0;
@@ -223,6 +228,7 @@
   int64_t total_freed_bytes_;
   CumulativeLogger cumulative_timings_;
   mutable Mutex pause_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  bool is_transaction_active_;
 
  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(GarbageCollector);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 85e6783..0039388 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -260,7 +260,8 @@
   mark_stack_->PushBack(obj);
 }
 
-void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) {
+void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                    bool do_atomic_update ATTRIBUTE_UNUSED) {
   if (updating_references_) {
     UpdateHeapReference(obj_ptr);
   } else {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 6d52d5d..85727c2 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -170,7 +170,8 @@
   // Mark a single object.
   virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) OVERRIDE
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                 bool do_atomic_update) OVERRIDE
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
   virtual mirror::Object* IsMarked(mirror::Object* obj) OVERRIDE
       REQUIRES_SHARED(Locks::heap_bitmap_lock_)
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index f00da73..f591cf0 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -532,7 +532,8 @@
   return !mark_bitmap_->AtomicTestAndSet(obj, visitor);
 }
 
-void MarkSweep::MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) {
+void MarkSweep::MarkHeapReference(mirror::HeapReference<mirror::Object>* ref,
+                                  bool do_atomic_update ATTRIBUTE_UNUSED) {
   MarkObject(ref->AsMirrorPtr(), nullptr, MemberOffset(0));
 }
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index a6e2d61..5a9b9f8 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -216,7 +216,8 @@
       REQUIRES(!mark_stack_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref,
+                                 bool do_atomic_update) OVERRIDE
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES(!mark_stack_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index cb9e7e2..67e7383 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -606,7 +606,8 @@
   return ref.AsMirrorPtr();
 }
 
-void SemiSpace::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) {
+void SemiSpace::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                  bool do_atomic_update ATTRIBUTE_UNUSED) {
   MarkObject(obj_ptr);
 }
 
@@ -723,7 +724,9 @@
 void SemiSpace::ScanObject(Object* obj) {
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   MarkObjectVisitor visitor(this);
-  obj->VisitReferences(visitor, visitor);
+  // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
+  obj->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+      visitor, visitor);
 }
 
 // Scan anything that's on the mark stack.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 52b5e5f..9d6e74d 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -110,7 +110,8 @@
   virtual mirror::Object* MarkObject(mirror::Object* root) OVERRIDE
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr) OVERRIDE
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                 bool do_atomic_update) OVERRIDE
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void ScanObject(mirror::Object* obj)
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index eef4fba..f0e1029 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -59,6 +59,8 @@
   kCollectorTypeHprof,
   // Fake collector for installing/removing a system-weak holder.
   kCollectorTypeAddRemoveSystemWeakHolder,
+  // Fake collector type for GetObjectsAllocated
+  kCollectorTypeGetObjectsAllocated,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 9e34346..c1c1cad 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -40,6 +40,7 @@
     case kGcCauseJitCodeCache: return "JitCodeCache";
     case kGcCauseAddRemoveSystemWeakHolder: return "SystemWeakHolder";
     case kGcCauseHprof: return "Hprof";
+    case kGcCauseGetObjectsAllocated: return "ObjectsAllocated";
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 9b285b1..eb27547 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -53,8 +53,10 @@
   kGcCauseJitCodeCache,
   // Not a real GC cause, used to add or remove system-weak holders.
   kGcCauseAddRemoveSystemWeakHolder,
-  // Not a real GC cause, used to hprof running in the middle of GC.
+  // Not a real GC cause, used to prevent hprof running in the middle of GC.
   kGcCauseHprof,
+  // Not a real GC cause, used to prevent GetObjectsAllocated running in the middle of GC.
+  kGcCauseGetObjectsAllocated,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 34afa2a..a769748 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -133,6 +133,17 @@
 // config.
 static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
 
+static const char* kRegionSpaceName = "main space (region space)";
+
+#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
+// 300 MB (0x12c00000) - (default non-moving space capacity).
+static uint8_t* const kPreferredAllocSpaceBegin =
+    reinterpret_cast<uint8_t*>(300 * MB - Heap::kDefaultNonMovingSpaceCapacity);
+#else
+// For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
+static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x20000000);
+#endif
+
 static inline bool CareAboutPauseTimes() {
   return Runtime::Current()->InJankPerceptibleProcessState();
 }
@@ -286,15 +297,9 @@
   // Requested begin for the alloc space, to follow the mapped image and oat files
   uint8_t* requested_alloc_space_begin = nullptr;
   if (foreground_collector_type_ == kCollectorTypeCC) {
-    // Need to use a low address so that we can allocate a contiguous
-    // 2 * Xmx space when there's no image (dex2oat for target).
-#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
-    CHECK_GE(300 * MB, non_moving_space_capacity);
-    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity;
-#else
-    // For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
-    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(0x20000000);
-#endif
+    // Need to use a low address so that we can allocate a contiguous 2 * Xmx space when there's no
+    // image (dex2oat for target).
+    requested_alloc_space_begin = kPreferredAllocSpaceBegin;
   }
 
   // Load image space(s).
@@ -369,12 +374,7 @@
                              &error_str));
     CHECK(non_moving_space_mem_map != nullptr) << error_str;
     // Try to reserve virtual memory at a lower address if we have a separate non moving space.
-#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
-    request_begin = reinterpret_cast<uint8_t*>(300 * MB);
-#else
-    // For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
-    request_begin = reinterpret_cast<uint8_t*>(0x20000000) + non_moving_space_capacity;
-#endif
+    request_begin = kPreferredAllocSpaceBegin + non_moving_space_capacity;
   }
   // Attempt to create 2 mem maps at or after the requested begin.
   if (foreground_collector_type_ != kCollectorTypeCC) {
@@ -419,7 +419,12 @@
   }
   // Create other spaces based on whether or not we have a moving GC.
   if (foreground_collector_type_ == kCollectorTypeCC) {
-    region_space_ = space::RegionSpace::Create("main space (region space)", capacity_ * 2, request_begin);
+    CHECK(separate_non_moving_space);
+    MemMap* region_space_mem_map = space::RegionSpace::CreateMemMap(kRegionSpaceName,
+                                                                    capacity_ * 2,
+                                                                    request_begin);
+    CHECK(region_space_mem_map != nullptr) << "No region space mem map";
+    region_space_ = space::RegionSpace::Create(kRegionSpaceName, region_space_mem_map);
     AddSpace(region_space_);
   } else if (IsMovingGc(foreground_collector_type_) &&
       foreground_collector_type_ != kCollectorTypeGSS) {
@@ -1830,6 +1835,11 @@
 size_t Heap::GetObjectsAllocated() const {
   Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingForGetObjectsAllocated);
+  // Prevent GC running during GetObjectsALlocated since we may get a checkpoint request that tells
+  // us to suspend while we are doing SuspendAll. b/35232978
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseGetObjectsAllocated,
+                                  gc::kCollectorTypeGetObjectsAllocated);
   // Need SuspendAll here to prevent lock violation if RosAlloc does it during InspectAll.
   ScopedSuspendAll ssa(__FUNCTION__);
   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -2327,7 +2337,9 @@
     size_t bin_size = object_addr - context->prev_;
     // Add the bin consisting of the end of the previous object to the start of the current object.
     collector->AddBin(bin_size, context->prev_);
-    context->prev_ = object_addr + RoundUp(obj->SizeOf(), kObjectAlignment);
+    // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
+    context->prev_ = object_addr + RoundUp(obj->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>(),
+                                           kObjectAlignment);
   }
 
   void AddBin(size_t size, uintptr_t position) {
@@ -2347,7 +2359,8 @@
 
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    size_t obj_size = obj->SizeOf();
+    // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
+    size_t obj_size = obj->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
     size_t alloc_size = RoundUp(obj_size, kObjectAlignment);
     mirror::Object* forward_address;
     // Find the smallest bin which we can move obj in.
@@ -3332,7 +3345,7 @@
   virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {
     return obj;
   }
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>*) OVERRIDE {
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>*, bool) OVERRIDE {
   }
 };
 
@@ -3551,11 +3564,8 @@
   collector::GcType gc_type = collector_ran->GetGcType();
   const double multiplier = HeapGrowthMultiplier();  // Use the multiplier to grow more for
   // foreground.
-  // Ensure at least 2.5 MB to temporarily fix excessive GC caused by TLAB ergonomics.
-  const uint64_t adjusted_min_free = std::max(static_cast<uint64_t>(min_free_ * multiplier),
-                                              static_cast<uint64_t>(5 * MB / 2));
-  const uint64_t adjusted_max_free = std::max(static_cast<uint64_t>(max_free_ * multiplier),
-                                              static_cast<uint64_t>(5 * MB / 2));
+  const uint64_t adjusted_min_free = static_cast<uint64_t>(min_free_ * multiplier);
+  const uint64_t adjusted_max_free = static_cast<uint64_t>(max_free_ * multiplier);
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
     ssize_t delta = bytes_allocated / GetTargetHeapUtilization() - bytes_allocated;
@@ -3961,7 +3971,14 @@
 
 void Heap::CheckPreconditionsForAllocObject(ObjPtr<mirror::Class> c, size_t byte_count) {
   CHECK(c == nullptr || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
-        (c->IsVariableSize() || c->GetObjectSize() == byte_count)) << c->GetClassFlags();
+        (c->IsVariableSize() || c->GetObjectSize() == byte_count))
+      << "ClassFlags=" << c->GetClassFlags()
+      << " IsClassClass=" << c->IsClassClass()
+      << " byte_count=" << byte_count
+      << " IsVariableSize=" << c->IsVariableSize()
+      << " ObjectSize=" << c->GetObjectSize()
+      << " sizeof(Class)=" << sizeof(mirror::Class)
+      << " klass=" << c.Ptr();
   CHECK_GE(byte_count, sizeof(mirror::Object));
 }
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 86b1522..65a550e 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -139,6 +139,14 @@
       CHECK_EQ(!self->GetWeakRefAccessEnabled(), concurrent);
     }
   }
+  if (kIsDebugBuild && collector->IsTransactionActive()) {
+    // In transaction mode, we shouldn't enqueue any Reference to the queues.
+    // See DelayReferenceReferent().
+    DCHECK(soft_reference_queue_.IsEmpty());
+    DCHECK(weak_reference_queue_.IsEmpty());
+    DCHECK(finalizer_reference_queue_.IsEmpty());
+    DCHECK(phantom_reference_queue_.IsEmpty());
+  }
   // Unless required to clear soft references with white references, preserve some white referents.
   if (!clear_soft_references) {
     TimingLogger::ScopedTiming split(concurrent ? "ForwardSoftReferences" :
@@ -206,6 +214,15 @@
   // do_atomic_update needs to be true because this happens outside of the reference processing
   // phase.
   if (!collector->IsNullOrMarkedHeapReference(referent, /*do_atomic_update*/true)) {
+    if (UNLIKELY(collector->IsTransactionActive())) {
+      // In transaction mode, keep the referent alive and avoid any reference processing to avoid the
+      // issue of rolling back reference processing.  do_atomic_update needs to be true because this
+      // happens outside of the reference processing phase.
+      if (!referent->IsNull()) {
+        collector->MarkHeapReference(referent, /*do_atomic_update*/ true);
+      }
+      return;
+    }
     Thread* self = Thread::Current();
     // TODO: Remove these locks, and use atomic stacks for storing references?
     // We need to check that the references haven't already been enqueued since we can end up
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 734caea..fd5dcf9 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -67,6 +67,11 @@
     list_->SetPendingNext(next);
   }
   ref->SetPendingNext(nullptr);
+  return ref;
+}
+
+// This must be called whenever DequeuePendingReference is called.
+void ReferenceQueue::DisableReadBarrierForReference(ObjPtr<mirror::Reference> ref) {
   Heap* heap = Runtime::Current()->GetHeap();
   if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
       heap->ConcurrentCopyingCollector()->IsActive()) {
@@ -92,7 +97,6 @@
       }
     }
   }
-  return ref;
 }
 
 void ReferenceQueue::Dump(std::ostream& os) const {
@@ -140,6 +144,9 @@
       }
       cleared_references->EnqueueReference(ref);
     }
+    // Delay disabling the read barrier until here so that the ClearReferent call above in
+    // transaction mode will trigger the read barrier.
+    DisableReadBarrierForReference(ref);
   }
 }
 
@@ -162,6 +169,9 @@
       }
       cleared_references->EnqueueReference(ref);
     }
+    // Delay disabling the read barrier until here so that the ClearReferent call above in
+    // transaction mode will trigger the read barrier.
+    DisableReadBarrierForReference(ref->AsReference());
   }
 }
 
@@ -174,7 +184,9 @@
   do {
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
     if (referent_addr->AsMirrorPtr() != nullptr) {
-      visitor->MarkHeapReference(referent_addr);
+      // do_atomic_update is false because mutators can't access the referent due to the weak ref
+      // access blocking.
+      visitor->MarkHeapReference(referent_addr, /*do_atomic_update*/ false);
     }
     ref = ref->GetPendingNext();
   } while (LIKELY(ref != head));
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index b5ec1e5..b73a880 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -63,8 +63,15 @@
   void EnqueueReference(ObjPtr<mirror::Reference> ref) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Dequeue a reference from the queue and return that dequeued reference.
+  // Call DisableReadBarrierForReference for the reference that's returned from this function.
   ObjPtr<mirror::Reference> DequeuePendingReference() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // If applicable, disable the read barrier for the reference after its referent is handled (see
+  // ConcurrentCopying::ProcessMarkStackRef.) This must be called for a reference that's dequeued
+  // from pending queue (DequeuePendingReference).
+  void DisableReadBarrierForReference(ObjPtr<mirror::Reference> ref)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to
   // the zombie field, and the referent field is cleared.
   void EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index b5eb979..f937d2c 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -29,10 +29,14 @@
                                                  CollectorType collector_type)
     : self_(self) {
   Runtime::Current()->GetHeap()->StartGC(self, cause, collector_type);
-  old_cause_ = self->StartAssertNoThreadSuspension("ScopedGCCriticalSection");
+  if (self != nullptr) {
+    old_cause_ = self->StartAssertNoThreadSuspension("ScopedGCCriticalSection");
+  }
 }
 ScopedGCCriticalSection::~ScopedGCCriticalSection() {
-  self_->EndAssertNoThreadSuspension(old_cause_);
+  if (self_ != nullptr) {
+    self_->EndAssertNoThreadSuspension(old_cause_);
+  }
   Runtime::Current()->GetHeap()->FinishGC(self_, collector::kGcTypeNone);
 }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 010ef11..568f8d6 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1259,17 +1259,18 @@
             }
           }
         }
-        ArtField** fields = dex_cache->GetResolvedFields();
+        mirror::FieldDexCacheType* fields = dex_cache->GetResolvedFields();
         if (fields != nullptr) {
-          ArtField** new_fields = fixup_adapter.ForwardObject(fields);
+          mirror::FieldDexCacheType* new_fields = fixup_adapter.ForwardObject(fields);
           if (fields != new_fields) {
             dex_cache->SetResolvedFields(new_fields);
           }
           for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
-            ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, pointer_size);
-            ArtField* copy = fixup_adapter.ForwardObject(orig);
-            if (orig != copy) {
-              mirror::DexCache::SetElementPtrSize(new_fields, j, copy, pointer_size);
+            mirror::FieldDexCachePair orig =
+                mirror::DexCache::GetNativePairPtrSize(new_fields, j, pointer_size);
+            mirror::FieldDexCachePair copy(fixup_adapter.ForwardObject(orig.object), orig.index);
+            if (orig.object != copy.object) {
+              mirror::DexCache::SetNativePairPtrSize(new_fields, j, copy, pointer_size);
             }
           }
         }
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 3e79223..5809027 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -78,7 +78,7 @@
       for (size_t i = 0; i < num_regions_; ++i) {
         Region* r = &regions_[i];
         if (r->IsFree()) {
-          r->Unfree(time_);
+          r->Unfree(this, time_);
           r->SetNewlyAllocated();
           ++num_non_free_regions_;
           obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
@@ -91,7 +91,7 @@
       for (size_t i = 0; i < num_regions_; ++i) {
         Region* r = &regions_[i];
         if (r->IsFree()) {
-          r->Unfree(time_);
+          r->Unfree(this, time_);
           ++num_non_free_regions_;
           obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
           CHECK(obj != nullptr);
@@ -233,10 +233,12 @@
       continue;
     }
     if (r->IsLarge()) {
+      // Avoid visiting dead large objects since they may contain dangling pointers to the
+      // from-space.
+      DCHECK_GT(r->LiveBytes(), 0u) << "Visiting dead large object";
       mirror::Object* obj = reinterpret_cast<mirror::Object*>(r->Begin());
-      if (obj->GetClass() != nullptr) {
-        callback(obj, arg);
-      }
+      DCHECK(obj->GetClass() != nullptr);
+      callback(obj, arg);
     } else if (r->IsLargeTail()) {
       // Do nothing.
     } else {
@@ -310,13 +312,13 @@
       DCHECK_EQ(left + num_regs, right);
       Region* first_reg = &regions_[left];
       DCHECK(first_reg->IsFree());
-      first_reg->UnfreeLarge(time_);
+      first_reg->UnfreeLarge(this, time_);
       ++num_non_free_regions_;
       first_reg->SetTop(first_reg->Begin() + num_bytes);
       for (size_t p = left + 1; p < right; ++p) {
         DCHECK_LT(p, num_regions_);
         DCHECK(regions_[p].IsFree());
-        regions_[p].UnfreeLargeTail(time_);
+        regions_[p].UnfreeLargeTail(this, time_);
         ++num_non_free_regions_;
       }
       *bytes_allocated = num_bytes;
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 8077319..1ad4843 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -28,20 +28,52 @@
 // value of the region size, evaculate the region.
 static constexpr uint kEvaculateLivePercentThreshold = 75U;
 
-RegionSpace* RegionSpace::Create(const std::string& name, size_t capacity,
-                                 uint8_t* requested_begin) {
-  capacity = RoundUp(capacity, kRegionSize);
+MemMap* RegionSpace::CreateMemMap(const std::string& name, size_t capacity,
+                                  uint8_t* requested_begin) {
+  CHECK_ALIGNED(capacity, kRegionSize);
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                       PROT_READ | PROT_WRITE, true, false,
-                                                       &error_msg));
+  // Ask for the capacity of an additional kRegionSize so that we can align the map by kRegionSize
+  // even if we get unaligned base address. This is necessary for the ReadBarrierTable to work.
+  std::unique_ptr<MemMap> mem_map;
+  while (true) {
+    mem_map.reset(MemMap::MapAnonymous(name.c_str(),
+                                       requested_begin,
+                                       capacity + kRegionSize,
+                                       PROT_READ | PROT_WRITE,
+                                       true,
+                                       false,
+                                       &error_msg));
+    if (mem_map.get() != nullptr || requested_begin == nullptr) {
+      break;
+    }
+    // Retry with no specified request begin.
+    requested_begin = nullptr;
+  }
   if (mem_map.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
         << PrettySize(capacity) << " with message " << error_msg;
     MemMap::DumpMaps(LOG_STREAM(ERROR));
     return nullptr;
   }
-  return new RegionSpace(name, mem_map.release());
+  CHECK_EQ(mem_map->Size(), capacity + kRegionSize);
+  CHECK_EQ(mem_map->Begin(), mem_map->BaseBegin());
+  CHECK_EQ(mem_map->Size(), mem_map->BaseSize());
+  if (IsAlignedParam(mem_map->Begin(), kRegionSize)) {
+    // Got an aligned map. Since we requested a map that's kRegionSize larger. Shrink by
+    // kRegionSize at the end.
+    mem_map->SetSize(capacity);
+  } else {
+    // Got an unaligned map. Align the both ends.
+    mem_map->AlignBy(kRegionSize);
+  }
+  CHECK_ALIGNED(mem_map->Begin(), kRegionSize);
+  CHECK_ALIGNED(mem_map->End(), kRegionSize);
+  CHECK_EQ(mem_map->Size(), capacity);
+  return mem_map.release();
+}
+
+RegionSpace* RegionSpace::Create(const std::string& name, MemMap* mem_map) {
+  return new RegionSpace(name, mem_map);
 }
 
 RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map)
@@ -54,6 +86,7 @@
   num_regions_ = mem_map_size / kRegionSize;
   num_non_free_regions_ = 0U;
   DCHECK_GT(num_regions_, 0U);
+  non_free_region_index_limit_ = 0U;
   regions_.reset(new Region[num_regions_]);
   uint8_t* region_addr = mem_map->Begin();
   for (size_t i = 0; i < num_regions_; ++i, region_addr += kRegionSize) {
@@ -160,7 +193,11 @@
   MutexLock mu(Thread::Current(), region_lock_);
   size_t num_expected_large_tails = 0;
   bool prev_large_evacuated = false;
-  for (size_t i = 0; i < num_regions_; ++i) {
+  VerifyNonFreeRegionLimit();
+  const size_t iter_limit = kUseTableLookupReadBarrier
+      ? num_regions_
+      : std::min(num_regions_, non_free_region_index_limit_);
+  for (size_t i = 0; i < iter_limit; ++i) {
     Region* r = &regions_[i];
     RegionState state = r->State();
     RegionType type = r->Type();
@@ -204,18 +241,50 @@
       }
     }
   }
+  DCHECK_EQ(num_expected_large_tails, 0U);
   current_region_ = &full_region_;
   evac_region_ = &full_region_;
 }
 
-void RegionSpace::ClearFromSpace() {
+void RegionSpace::ClearFromSpace(uint64_t* cleared_bytes, uint64_t* cleared_objects) {
+  DCHECK(cleared_bytes != nullptr);
+  DCHECK(cleared_objects != nullptr);
+  *cleared_bytes = 0;
+  *cleared_objects = 0;
   MutexLock mu(Thread::Current(), region_lock_);
-  for (size_t i = 0; i < num_regions_; ++i) {
+  VerifyNonFreeRegionLimit();
+  size_t new_non_free_region_index_limit = 0;
+  for (size_t i = 0; i < std::min(num_regions_, non_free_region_index_limit_); ++i) {
     Region* r = &regions_[i];
     if (r->IsInFromSpace()) {
-      r->Clear();
+      *cleared_bytes += r->BytesAllocated();
+      *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
+      r->Clear();
     } else if (r->IsInUnevacFromSpace()) {
+      if (r->LiveBytes() == 0) {
+        // Special case for 0 live bytes, this means all of the objects in the region are dead and
+        // we can clear it. This is important for large objects since we must not visit dead ones in
+        // RegionSpace::Walk because they may contain dangling references to invalid objects.
+        // It is also better to clear these regions now instead of at the end of the next GC to
+        // save RAM. If we don't clear the regions here, they will be cleared next GC by the normal
+        // live percent evacuation logic.
+        size_t free_regions = 1;
+        // Also release RAM for large tails.
+        while (i + free_regions < num_regions_ && regions_[i + free_regions].IsLargeTail()) {
+          DCHECK(r->IsLarge());
+          regions_[i + free_regions].Clear();
+          ++free_regions;
+        }
+        *cleared_bytes += r->BytesAllocated();
+        *cleared_objects += r->ObjectsAllocated();
+        num_non_free_regions_ -= free_regions;
+        r->Clear();
+        GetLiveBitmap()->ClearRange(
+            reinterpret_cast<mirror::Object*>(r->Begin()),
+            reinterpret_cast<mirror::Object*>(r->Begin() + free_regions * kRegionSize));
+        continue;
+      }
       size_t full_count = 0;
       while (r->IsInUnevacFromSpace()) {
         Region* const cur = &regions_[i + full_count];
@@ -223,6 +292,7 @@
             cur->LiveBytes() != static_cast<size_t>(cur->Top() - cur->Begin())) {
           break;
         }
+        DCHECK(cur->IsInUnevacFromSpace());
         if (full_count != 0) {
           cur->SetUnevacFromSpaceAsToSpace();
         }
@@ -239,7 +309,15 @@
         i += full_count - 1;
       }
     }
+    // Note r != last_checked_region if r->IsInUnevacFromSpace() was true above.
+    Region* last_checked_region = &regions_[i];
+    if (!last_checked_region->IsFree()) {
+      new_non_free_region_index_limit = std::max(new_non_free_region_index_limit,
+                                                 last_checked_region->Idx() + 1);
+    }
   }
+  // Update non_free_region_index_limit_.
+  SetNonFreeRegionLimit(new_non_free_region_index_limit);
   evac_region_ = nullptr;
 }
 
@@ -292,6 +370,7 @@
     }
     r->Clear();
   }
+  SetNonFreeRegionLimit(0);
   current_region_ = &full_region_;
   evac_region_ = &full_region_;
 }
@@ -358,7 +437,7 @@
   for (size_t i = 0; i < num_regions_; ++i) {
     Region* r = &regions_[i];
     if (r->IsFree()) {
-      r->Unfree(time_);
+      r->Unfree(this, time_);
       ++num_non_free_regions_;
       r->SetNewlyAllocated();
       r->SetTop(r->End());
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index f3b9595..2537929 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -35,10 +35,11 @@
     return kSpaceTypeRegionSpace;
   }
 
-  // Create a region space with the requested sizes. The requested base address is not
+  // Create a region space mem map with the requested sizes. The requested base address is not
   // guaranteed to be granted, if it is required, the caller should call Begin on the returned
   // space to confirm the request was granted.
-  static RegionSpace* Create(const std::string& name, size_t capacity, uint8_t* requested_begin);
+  static MemMap* CreateMemMap(const std::string& name, size_t capacity, uint8_t* requested_begin);
+  static RegionSpace* Create(const std::string& name, MemMap* mem_map);
 
   // Allocate num_bytes, returns null if the space is full.
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -166,7 +167,7 @@
   // Object alignment within the space.
   static constexpr size_t kAlignment = kObjectAlignment;
   // The region size.
-  static constexpr size_t kRegionSize = 1 * MB;
+  static constexpr size_t kRegionSize = 256 * KB;
 
   bool IsInFromSpace(mirror::Object* ref) {
     if (HasAddress(ref)) {
@@ -176,6 +177,14 @@
     return false;
   }
 
+  bool IsInNewlyAllocatedRegion(mirror::Object* ref) {
+    if (HasAddress(ref)) {
+      Region* r = RefToRegionUnlocked(ref);
+      return r->IsNewlyAllocated();
+    }
+    return false;
+  }
+
   bool IsInUnevacFromSpace(mirror::Object* ref) {
     if (HasAddress(ref)) {
       Region* r = RefToRegionUnlocked(ref);
@@ -206,7 +215,7 @@
   size_t FromSpaceSize() REQUIRES(!region_lock_);
   size_t UnevacFromSpaceSize() REQUIRES(!region_lock_);
   size_t ToSpaceSize() REQUIRES(!region_lock_);
-  void ClearFromSpace() REQUIRES(!region_lock_);
+  void ClearFromSpace(uint64_t* cleared_bytes, uint64_t* cleared_objects) REQUIRES(!region_lock_);
 
   void AddLiveBytes(mirror::Object* ref, size_t alloc_size) {
     Region* reg = RefToRegionUnlocked(ref);
@@ -299,25 +308,31 @@
     }
 
     // Given a free region, declare it non-free (allocated).
-    void Unfree(uint32_t alloc_time) {
+    void Unfree(RegionSpace* region_space, uint32_t alloc_time)
+        REQUIRES(region_space->region_lock_) {
       DCHECK(IsFree());
       state_ = RegionState::kRegionStateAllocated;
       type_ = RegionType::kRegionTypeToSpace;
       alloc_time_ = alloc_time;
+      region_space->AdjustNonFreeRegionLimit(idx_);
     }
 
-    void UnfreeLarge(uint32_t alloc_time) {
+    void UnfreeLarge(RegionSpace* region_space, uint32_t alloc_time)
+        REQUIRES(region_space->region_lock_) {
       DCHECK(IsFree());
       state_ = RegionState::kRegionStateLarge;
       type_ = RegionType::kRegionTypeToSpace;
       alloc_time_ = alloc_time;
+      region_space->AdjustNonFreeRegionLimit(idx_);
     }
 
-    void UnfreeLargeTail(uint32_t alloc_time) {
+    void UnfreeLargeTail(RegionSpace* region_space, uint32_t alloc_time)
+        REQUIRES(region_space->region_lock_) {
       DCHECK(IsFree());
       state_ = RegionState::kRegionStateLargeTail;
       type_ = RegionType::kRegionTypeToSpace;
       alloc_time_ = alloc_time;
+      region_space->AdjustNonFreeRegionLimit(idx_);
     }
 
     void SetNewlyAllocated() {
@@ -333,7 +348,7 @@
     bool IsLarge() const {
       bool is_large = state_ == RegionState::kRegionStateLarge;
       if (is_large) {
-        DCHECK_LT(begin_ + 1 * MB, Top());
+        DCHECK_LT(begin_ + kRegionSize, Top());
       }
       return is_large;
     }
@@ -351,6 +366,10 @@
       return idx_;
     }
 
+    bool IsNewlyAllocated() const {
+      return is_newly_allocated_;
+    }
+
     bool IsInFromSpace() const {
       return type_ == RegionType::kRegionTypeFromSpace;
     }
@@ -416,7 +435,7 @@
 
     size_t ObjectsAllocated() const {
       if (IsLarge()) {
-        DCHECK_LT(begin_ + 1 * MB, Top());
+        DCHECK_LT(begin_ + kRegionSize, Top());
         DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
         return 1;
       } else if (IsLargeTail()) {
@@ -507,6 +526,27 @@
   mirror::Object* GetNextObject(mirror::Object* obj)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void AdjustNonFreeRegionLimit(size_t new_non_free_region_index) REQUIRES(region_lock_) {
+    DCHECK_LT(new_non_free_region_index, num_regions_);
+    non_free_region_index_limit_ = std::max(non_free_region_index_limit_,
+                                            new_non_free_region_index + 1);
+    VerifyNonFreeRegionLimit();
+  }
+
+  void SetNonFreeRegionLimit(size_t new_non_free_region_index_limit) REQUIRES(region_lock_) {
+    DCHECK_LE(new_non_free_region_index_limit, num_regions_);
+    non_free_region_index_limit_ = new_non_free_region_index_limit;
+    VerifyNonFreeRegionLimit();
+  }
+
+  void VerifyNonFreeRegionLimit() REQUIRES(region_lock_) {
+    if (kIsDebugBuild && non_free_region_index_limit_ < num_regions_) {
+      for (size_t i = non_free_region_index_limit_; i < num_regions_; ++i) {
+        CHECK(regions_[i].IsFree());
+      }
+    }
+  }
+
   Mutex region_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   uint32_t time_;                  // The time as the number of collections since the startup.
@@ -514,6 +554,10 @@
   size_t num_non_free_regions_;    // The number of non-free regions in this space.
   std::unique_ptr<Region[]> regions_ GUARDED_BY(region_lock_);
                                    // The pointer to the region array.
+  // The upper-bound index of the non-free regions. Used to avoid scanning all regions in
+  // SetFromSpace().  Invariant: for all i >= non_free_region_index_limit_, regions_[i].IsFree() is
+  // true.
+  size_t non_free_region_index_limit_ GUARDED_BY(region_lock_);
   Region* current_region_;         // The region that's being allocated currently.
   Region* evac_region_;            // The region that's being evacuated to currently.
   Region full_region_;             // The dummy/sentinel region that looks full.
diff --git a/runtime/gc_root-inl.h b/runtime/gc_root-inl.h
index 390ed3c..7795c66 100644
--- a/runtime/gc_root-inl.h
+++ b/runtime/gc_root-inl.h
@@ -38,7 +38,7 @@
     : root_(mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref)) { }
 
 template<class MirrorType>
-inline GcRoot<MirrorType>::GcRoot(ObjPtr<MirrorType, kIsDebugBuild> ref)
+inline GcRoot<MirrorType>::GcRoot(ObjPtr<MirrorType> ref)
     : GcRoot(ref.Ptr()) { }
 
 inline std::string RootInfo::ToString() const {
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 79e80f1..0894e9b 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -24,7 +24,7 @@
 namespace art {
 class ArtField;
 class ArtMethod;
-template<class MirrorType, bool kPoison> class ObjPtr;
+template<class MirrorType> class ObjPtr;
 
 namespace mirror {
 class Object;
@@ -215,7 +215,7 @@
   ALWAYS_INLINE GcRoot() {}
   explicit ALWAYS_INLINE GcRoot(MirrorType* ref)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  explicit ALWAYS_INLINE GcRoot(ObjPtr<MirrorType, kIsDebugBuild> ref)
+  explicit ALWAYS_INLINE GcRoot(ObjPtr<MirrorType> ref)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index 077f45e..492d4b4 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -114,9 +114,9 @@
   return h;
 }
 
-template<size_t kNumReferences> template<class MirrorType, bool kPoison>
+template<size_t kNumReferences> template<class MirrorType>
 inline MutableHandle<MirrorType> FixedSizeHandleScope<kNumReferences>::NewHandle(
-    ObjPtr<MirrorType, kPoison> object) {
+    ObjPtr<MirrorType> object) {
   return NewHandle(object.Ptr());
 }
 
@@ -191,9 +191,8 @@
   return current_scope_->NewHandle(object);
 }
 
-template<class MirrorType, bool kPoison>
-inline MutableHandle<MirrorType> VariableSizedHandleScope::NewHandle(
-    ObjPtr<MirrorType, kPoison> ptr) {
+template<class MirrorType>
+inline MutableHandle<MirrorType> VariableSizedHandleScope::NewHandle(ObjPtr<MirrorType> ptr) {
   return NewHandle(ptr.Ptr());
 }
 
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index adb7d8a..c43a482 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -30,7 +30,7 @@
 namespace art {
 
 class HandleScope;
-template<class MirrorType, bool kPoison> class ObjPtr;
+template<class MirrorType> class ObjPtr;
 class Thread;
 class VariableSizedHandleScope;
 
@@ -224,8 +224,8 @@
   ALWAYS_INLINE HandleWrapperObjPtr<T> NewHandleWrapper(ObjPtr<T>* object)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<class MirrorType, bool kPoison>
-  ALWAYS_INLINE MutableHandle<MirrorType> NewHandle(ObjPtr<MirrorType, kPoison> object)
+  template<class MirrorType>
+  ALWAYS_INLINE MutableHandle<MirrorType> NewHandle(ObjPtr<MirrorType> object)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE void SetReference(size_t i, mirror::Object* object)
@@ -250,7 +250,7 @@
   StackReference<mirror::Object> storage_[kNumReferences];
 
   // Position new handles will be created.
-  size_t pos_ = 0;
+  uint32_t pos_ = 0;
 
   template<size_t kNumRefs> friend class StackHandleScope;
   friend class VariableSizedHandleScope;
@@ -286,8 +286,8 @@
   template<class T>
   MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<class MirrorType, bool kPoison>
-  MutableHandle<MirrorType> NewHandle(ObjPtr<MirrorType, kPoison> ptr)
+  template<class MirrorType>
+  MutableHandle<MirrorType> NewHandle(ObjPtr<MirrorType> ptr)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Number of references contained within this handle scope.
@@ -299,12 +299,20 @@
   void VisitRoots(Visitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  static constexpr size_t kNumReferencesPerScope = 4;
+  static constexpr size_t kLocalScopeSize = 64u;
+  static constexpr size_t kSizeOfReferencesPerScope =
+      kLocalScopeSize
+          - /* BaseHandleScope::link_ */ sizeof(BaseHandleScope*)
+          - /* BaseHandleScope::number_of_references_ */ sizeof(int32_t)
+          - /* FixedSizeHandleScope<>::pos_ */ sizeof(uint32_t);
+  static constexpr size_t kNumReferencesPerScope =
+      kSizeOfReferencesPerScope / sizeof(StackReference<mirror::Object>);
 
   Thread* const self_;
 
   // Linked list of fixed size handle scopes.
   using LocalScopeType = FixedSizeHandleScope<kNumReferencesPerScope>;
+  static_assert(sizeof(LocalScopeType) == kLocalScopeSize, "Unexpected size of LocalScopeType");
   LocalScopeType* current_scope_;
 
   DISALLOW_COPY_AND_ASSIGN(VariableSizedHandleScope);
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index e59c4bb..495fec7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1111,7 +1111,9 @@
   if (space != nullptr) {
     if (space->IsZygoteSpace()) {
       heap_type = HPROF_HEAP_ZYGOTE;
-    } else if (space->IsImageSpace()) {
+    } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
+      // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as
+      // HPROF_HEAP_APP. b/35762934
       heap_type = HPROF_HEAP_IMAGE;
     }
   } else {
diff --git a/runtime/image.cc b/runtime/image.cc
index 4e6da79..b153ea0 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '8', '\0' };  // hash-based DexCache types
+const uint8_t ImageHeader::kImageVersion[] = { '0', '4', '3', '\0' };  // hash-based DexCache fields
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 6b22af9..2589ad0 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_INTERPRETER_INTERPRETER_COMMON_H_
 
 #include "interpreter.h"
+#include "interpreter_intrinsics.h"
 
 #include <math.h>
 
@@ -104,13 +105,58 @@
 void RecordArrayElementsInTransaction(ObjPtr<mirror::Array> array, int32_t count)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-// Invokes the given method. This is part of the invocation support and is used by DoInvoke and
-// DoInvokeVirtualQuick functions.
+// Invokes the given method. This is part of the invocation support and is used by DoInvoke,
+// DoFastInvoke and DoInvokeVirtualQuick functions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
+// Handles streamlined non-range invoke static, direct and virtual instructions originating in
+// mterp. Access checks and instrumentation other than jit profiling are not supported, but does
+// support interpreter intrinsics if applicable.
+// Returns true on success, otherwise throws an exception and returns false.
+template<InvokeType type>
+static inline bool DoFastInvoke(Thread* self,
+                                ShadowFrame& shadow_frame,
+                                const Instruction* inst,
+                                uint16_t inst_data,
+                                JValue* result) {
+  const uint32_t method_idx = inst->VRegB_35c();
+  const uint32_t vregC = inst->VRegC_35c();
+  ObjPtr<mirror::Object> receiver = (type == kStatic)
+      ? nullptr
+      : shadow_frame.GetVRegReference(vregC);
+  ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const called_method = FindMethodFromCode<type, false>(
+      method_idx, &receiver, sf_method, self);
+  // The shadow frame should already be pushed, so we don't need to update it.
+  if (UNLIKELY(called_method == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
+    result->SetJ(0);
+    return false;
+  } else {
+    if (called_method->IsIntrinsic()) {
+      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
+                               shadow_frame.GetResultRegister())) {
+        return !self->IsExceptionPending();
+      }
+    }
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      if (type == kVirtual) {
+        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    }
+    return DoCall<false, false>(called_method, self, shadow_frame, inst, inst_data, result);
+  }
+}
+
 // Handles all invoke-XXX/range instructions except for invoke-polymorphic[/range].
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
@@ -495,8 +541,9 @@
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
-  template REQUIRES_SHARED(Locks::mutator_lock_)                                     \
-  bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
+  template REQUIRES_SHARED(Locks::mutator_lock_)                                           \
+  bool DoInvoke<_type, _is_range, _do_check>(Thread* self,                                 \
+                                             ShadowFrame& shadow_frame,                    \
                                              const Instruction* inst, uint16_t inst_data,  \
                                              JValue* result)
 
@@ -514,6 +561,19 @@
 #undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL
 
+// Explicitly instantiate all DoFastInvoke functions.
+#define EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(_type)                     \
+  template REQUIRES_SHARED(Locks::mutator_lock_)                         \
+  bool DoFastInvoke<_type>(Thread* self,                                 \
+                           ShadowFrame& shadow_frame,                    \
+                           const Instruction* inst, uint16_t inst_data,  \
+                           JValue* result)
+
+EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kStatic);     // invoke-static
+EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kDirect);     // invoke-direct
+EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kVirtual);    // invoke-virtual
+#undef EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL
+
 // Explicitly instantiate all DoInvokeVirtualQuick functions.
 #define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \
   template REQUIRES_SHARED(Locks::mutator_lock_)                               \
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
new file mode 100644
index 0000000..ff0c20e
--- /dev/null
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter/interpreter_common.h"
+#include "interpreter/interpreter_intrinsics.h"
+
+namespace art {
+namespace interpreter {
+
+#define BINARY_SIMPLE_INTRINSIC(name, op, get, set, offset)  \
+static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,    \
+                               const Instruction* inst,      \
+                               uint16_t inst_data,           \
+                               JValue* result_register)      \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                  \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};            \
+  inst->GetVarArgs(arg, inst_data);                          \
+  result_register->set(op(shadow_frame->get(arg[0]), shadow_frame->get(arg[offset]))); \
+  return true;                                               \
+}
+
+#define UNARY_SIMPLE_INTRINSIC(name, op, get, set)           \
+static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,    \
+                               const Instruction* inst,      \
+                               uint16_t inst_data,           \
+                               JValue* result_register)      \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                  \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};            \
+  inst->GetVarArgs(arg, inst_data);                          \
+  result_register->set(op(shadow_frame->get(arg[0])));       \
+  return true;                                               \
+}
+
+// java.lang.Math.min(II)I
+BINARY_SIMPLE_INTRINSIC(MterpMathMinIntInt, std::min, GetVReg, SetI, 1);
+// java.lang.Math.min(JJ)J
+BINARY_SIMPLE_INTRINSIC(MterpMathMinLongLong, std::min, GetVRegLong, SetJ, 2);
+// java.lang.Math.max(II)I
+BINARY_SIMPLE_INTRINSIC(MterpMathMaxIntInt, std::max, GetVReg, SetI, 1);
+// java.lang.Math.max(JJ)J
+BINARY_SIMPLE_INTRINSIC(MterpMathMaxLongLong, std::max, GetVRegLong, SetJ, 2);
+// java.lang.Math.abs(I)I
+UNARY_SIMPLE_INTRINSIC(MterpMathAbsInt, std::abs, GetVReg, SetI);
+// java.lang.Math.abs(J)J
+UNARY_SIMPLE_INTRINSIC(MterpMathAbsLong, std::abs, GetVRegLong, SetJ);
+// java.lang.Math.abs(F)F
+UNARY_SIMPLE_INTRINSIC(MterpMathAbsFloat, 0x7fffffff&, GetVReg, SetI);
+// java.lang.Math.abs(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathAbsDouble, INT64_C(0x7fffffffffffffff)&, GetVRegLong, SetJ);
+// java.lang.Math.sqrt(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathSqrt, std::sqrt, GetVRegDouble, SetD);
+// java.lang.Math.ceil(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathCeil, std::ceil, GetVRegDouble, SetD);
+// java.lang.Math.floor(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathFloor, std::floor, GetVRegDouble, SetD);
+// java.lang.Math.sin(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathSin, std::sin, GetVRegDouble, SetD);
+// java.lang.Math.cos(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathCos, std::cos, GetVRegDouble, SetD);
+// java.lang.Math.tan(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathTan, std::tan, GetVRegDouble, SetD);
+// java.lang.Math.asin(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathAsin, std::asin, GetVRegDouble, SetD);
+// java.lang.Math.acos(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathAcos, std::acos, GetVRegDouble, SetD);
+// java.lang.Math.atan(D)D
+UNARY_SIMPLE_INTRINSIC(MterpMathAtan, std::atan, GetVRegDouble, SetD);
+
+// java.lang.String.charAt(I)C
+static ALWAYS_INLINE bool MterpStringCharAt(ShadowFrame* shadow_frame,
+                                            const Instruction* inst,
+                                            uint16_t inst_data,
+                                            JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  int length = str->GetLength();
+  int index = shadow_frame->GetVReg(arg[1]);
+  uint16_t res;
+  if (UNLIKELY(index < 0) || (index >= length)) {
+    return false;  // Punt and let non-intrinsic version deal with the throw.
+  }
+  if (str->IsCompressed()) {
+    res = str->GetValueCompressed()[index];
+  } else {
+    res = str->GetValue()[index];
+  }
+  result_register->SetC(res);
+  return true;
+}
+
+// java.lang.String.compareTo(Ljava/lang/string)I
+static ALWAYS_INLINE bool MterpStringCompareTo(ShadowFrame* shadow_frame,
+                                               const Instruction* inst,
+                                               uint16_t inst_data,
+                                               JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  mirror::Object* arg1 = shadow_frame->GetVRegReference(arg[1]);
+  if (arg1 == nullptr) {
+    return false;
+  }
+  result_register->SetI(str->CompareTo(arg1->AsString()));
+  return true;
+}
+
+#define STRING_INDEXOF_INTRINSIC(name, starting_pos)             \
+static ALWAYS_INLINE bool Mterp##name(ShadowFrame* shadow_frame, \
+                                      const Instruction* inst,   \
+                                      uint16_t inst_data,        \
+                                      JValue* result_register)   \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                      \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};                \
+  inst->GetVarArgs(arg, inst_data);                              \
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString(); \
+  int ch = shadow_frame->GetVReg(arg[1]);                        \
+  if (ch >= 0x10000) {                                           \
+    /* Punt if supplementary char. */                            \
+    return false;                                                \
+  }                                                              \
+  result_register->SetI(str->FastIndexOf(ch, starting_pos));     \
+  return true;                                                   \
+}
+
+// java.lang.String.indexOf(I)I
+STRING_INDEXOF_INTRINSIC(StringIndexOf, 0);
+
+// java.lang.String.indexOf(II)I
+STRING_INDEXOF_INTRINSIC(StringIndexOfAfter, shadow_frame->GetVReg(arg[2]));
+
+#define SIMPLE_STRING_INTRINSIC(name, operation)                 \
+static ALWAYS_INLINE bool Mterp##name(ShadowFrame* shadow_frame, \
+                                      const Instruction* inst,   \
+                                      uint16_t inst_data,        \
+                                      JValue* result_register)   \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                      \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};                \
+  inst->GetVarArgs(arg, inst_data);                              \
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString(); \
+  result_register->operation;                                    \
+  return true;                                                   \
+}
+
+// java.lang.String.isEmpty()Z
+SIMPLE_STRING_INTRINSIC(StringIsEmpty, SetZ(str->GetLength() == 0))
+
+// java.lang.String.length()I
+SIMPLE_STRING_INTRINSIC(StringLength, SetI(str->GetLength()))
+
+// java.lang.String.getCharsNoCheck(II[CI)V
+static ALWAYS_INLINE bool MterpStringGetCharsNoCheck(ShadowFrame* shadow_frame,
+                                                     const Instruction* inst,
+                                                     uint16_t inst_data,
+                                                     JValue* result_register ATTRIBUTE_UNUSED)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Start, end & index already checked by caller - won't throw.  Destination is uncompressed.
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  int32_t start = shadow_frame->GetVReg(arg[1]);
+  int32_t end = shadow_frame->GetVReg(arg[2]);
+  int32_t index = shadow_frame->GetVReg(arg[4]);
+  mirror::CharArray* array = shadow_frame->GetVRegReference(arg[3])->AsCharArray();
+  uint16_t* dst = array->GetData() + index;
+  int32_t len = (end - start);
+  if (str->IsCompressed()) {
+    const uint8_t* src_8 = str->GetValueCompressed() + start;
+    for (int i = 0; i < len; i++) {
+      dst[i] = src_8[i];
+    }
+  } else {
+    uint16_t* src_16 = str->GetValue() + start;
+    memcpy(dst, src_16, len * sizeof(uint16_t));
+  }
+  return true;
+}
+
+// java.lang.String.equalsLjava/lang/Object;)Z
+static ALWAYS_INLINE bool MterpStringEquals(ShadowFrame* shadow_frame,
+                                            const Instruction* inst,
+                                            uint16_t inst_data,
+                                            JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg[1]);
+  bool res = false;  // Assume not equal.
+  if ((obj != nullptr) && obj->IsString()) {
+    mirror::String* str2 = obj->AsString();
+    if (str->GetCount() == str2->GetCount()) {
+      // Length & compression status are same.  Can use block compare.
+      void* bytes1;
+      void* bytes2;
+      int len = str->GetLength();
+      if (str->IsCompressed()) {
+        bytes1 = str->GetValueCompressed();
+        bytes2 = str2->GetValueCompressed();
+      } else {
+        len *= sizeof(uint16_t);
+        bytes1 = str->GetValue();
+        bytes2 = str2->GetValue();
+      }
+      res = (memcmp(bytes1, bytes2, len) == 0);
+    }
+  }
+  result_register->SetZ(res);
+  return true;
+}
+
+#define INTRINSIC_CASE(name)                                           \
+    case Intrinsics::k##name:                                          \
+      res = Mterp##name(shadow_frame, inst, inst_data, result_register); \
+      break;
+
+bool MterpHandleIntrinsic(ShadowFrame* shadow_frame,
+                          ArtMethod* const called_method,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  Intrinsics intrinsic = static_cast<Intrinsics>(called_method->GetIntrinsic());
+  bool res = false;  // Assume failure
+  switch (intrinsic) {
+    INTRINSIC_CASE(MathMinIntInt)
+    INTRINSIC_CASE(MathMinLongLong)
+    INTRINSIC_CASE(MathMaxIntInt)
+    INTRINSIC_CASE(MathMaxLongLong)
+    INTRINSIC_CASE(MathAbsInt)
+    INTRINSIC_CASE(MathAbsLong)
+    INTRINSIC_CASE(MathAbsFloat)
+    INTRINSIC_CASE(MathAbsDouble)
+    INTRINSIC_CASE(MathSqrt)
+    INTRINSIC_CASE(MathCeil)
+    INTRINSIC_CASE(MathFloor)
+    INTRINSIC_CASE(MathSin)
+    INTRINSIC_CASE(MathCos)
+    INTRINSIC_CASE(MathTan)
+    INTRINSIC_CASE(MathAsin)
+    INTRINSIC_CASE(MathAcos)
+    INTRINSIC_CASE(MathAtan)
+    INTRINSIC_CASE(StringCharAt)
+    INTRINSIC_CASE(StringCompareTo)
+    INTRINSIC_CASE(StringIndexOf)
+    INTRINSIC_CASE(StringIndexOfAfter)
+    INTRINSIC_CASE(StringEquals)
+    INTRINSIC_CASE(StringGetCharsNoCheck)
+    INTRINSIC_CASE(StringIsEmpty)
+    INTRINSIC_CASE(StringLength)
+    default:
+      res = false;  // Punt
+      break;
+  }
+  return res;
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/interpreter_intrinsics.h b/runtime/interpreter/interpreter_intrinsics.h
new file mode 100644
index 0000000..ae45679
--- /dev/null
+++ b/runtime/interpreter/interpreter_intrinsics.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_INTRINSICS_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_INTRINSICS_H_
+
+#include "compiler/intrinsics_enum.h"
+#include "dex_instruction.h"
+
+namespace art {
+namespace interpreter {
+
+// Invokes to methods identified as intrinics are routed here.  If there is
+// no interpreter implementation, return false and a normal invoke will proceed.
+bool MterpHandleIntrinsic(ShadowFrame* shadow_frame,
+                          ArtMethod* const called_method,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result_register);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_INTRINSICS_H_
diff --git a/runtime/interpreter/mterp/arm/op_sget.S b/runtime/interpreter/mterp/arm/op_sget.S
index 2b81f50..3c813ef 100644
--- a/runtime/interpreter/mterp/arm/op_sget.S
+++ b/runtime/interpreter/mterp/arm/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+%default { "is_object":"0", "helper":"MterpGet32Static" }
     /*
      * General SGET handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm/op_sget_boolean.S b/runtime/interpreter/mterp/arm/op_sget_boolean.S
index ebfb44c..eb06aa8 100644
--- a/runtime/interpreter/mterp/arm/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/arm/op_sget_boolean.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_byte.S b/runtime/interpreter/mterp/arm/op_sget_byte.S
index d76862e..9f4c904 100644
--- a/runtime/interpreter/mterp/arm/op_sget_byte.S
+++ b/runtime/interpreter/mterp/arm/op_sget_byte.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_char.S b/runtime/interpreter/mterp/arm/op_sget_char.S
index b7fcfc2..dd8c991 100644
--- a/runtime/interpreter/mterp/arm/op_sget_char.S
+++ b/runtime/interpreter/mterp/arm/op_sget_char.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_object.S b/runtime/interpreter/mterp/arm/op_sget_object.S
index 8e7d075..e1d9eae 100644
--- a/runtime/interpreter/mterp/arm/op_sget_object.S
+++ b/runtime/interpreter/mterp/arm/op_sget_object.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "arm/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_short.S b/runtime/interpreter/mterp/arm/op_sget_short.S
index 3e80f0d..c0d61c4 100644
--- a/runtime/interpreter/mterp/arm/op_sget_short.S
+++ b/runtime/interpreter/mterp/arm/op_sget_short.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
index 4f2f89d..aeee016 100644
--- a/runtime/interpreter/mterp/arm/op_sget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -4,12 +4,12 @@
      */
     /* sget-wide vAA, field@BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
     VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
diff --git a/runtime/interpreter/mterp/arm/op_sput.S b/runtime/interpreter/mterp/arm/op_sput.S
index 7e0c1a6..494df8a 100644
--- a/runtime/interpreter/mterp/arm/op_sput.S
+++ b/runtime/interpreter/mterp/arm/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
     /*
      * General SPUT handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm/op_sput_boolean.S b/runtime/interpreter/mterp/arm/op_sput_boolean.S
index e3bbf2b..47bed0a 100644
--- a/runtime/interpreter/mterp/arm/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/arm/op_sput_boolean.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_byte.S b/runtime/interpreter/mterp/arm/op_sput_byte.S
index e3bbf2b..b4d22b4 100644
--- a/runtime/interpreter/mterp/arm/op_sput_byte.S
+++ b/runtime/interpreter/mterp/arm/op_sput_byte.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_char.S b/runtime/interpreter/mterp/arm/op_sput_char.S
index d8d65cb..58a957d 100644
--- a/runtime/interpreter/mterp/arm/op_sput_char.S
+++ b/runtime/interpreter/mterp/arm/op_sput_char.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_short.S b/runtime/interpreter/mterp/arm/op_sput_short.S
index d8d65cb..88c3211 100644
--- a/runtime/interpreter/mterp/arm/op_sput_short.S
+++ b/runtime/interpreter/mterp/arm/op_sput_short.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
index 8d8ed8c..1e8fcc9 100644
--- a/runtime/interpreter/mterp/arm/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -3,15 +3,15 @@
      *
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   r0, 1                       @ r0<- field ref BBBB
-    ldr     r1, [rFP, #OFF_FP_METHOD]
-    mov     r2, rINST, lsr #8           @ r3<- AA
-    VREG_INDEX_TO_ADDR r2, r2
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    VREG_INDEX_TO_ADDR r1, r1
+    ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
diff --git a/runtime/interpreter/mterp/arm64/op_sget.S b/runtime/interpreter/mterp/arm64/op_sget.S
index 6352ce0..84e71ac 100644
--- a/runtime/interpreter/mterp/arm64/op_sget.S
+++ b/runtime/interpreter/mterp/arm64/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+%default { "is_object":"0", "helper":"MterpGet32Static", "extend":"" }
     /*
      * General SGET handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm64/op_sget_boolean.S b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
index c40dbdd..868f41c 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"uxtb w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetBooleanStatic", "extend":"uxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_byte.S b/runtime/interpreter/mterp/arm64/op_sget_byte.S
index 6cf69a3..e135aa7 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_byte.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_byte.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"sxtb w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetByteStatic", "extend":"sxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_char.S b/runtime/interpreter/mterp/arm64/op_sget_char.S
index 8924a34..05d57ac 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_char.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_char.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"uxth w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetCharStatic", "extend":"uxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_object.S b/runtime/interpreter/mterp/arm64/op_sget_object.S
index 620b0ba..1faaf6e 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_object.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_object.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "arm64/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_short.S b/runtime/interpreter/mterp/arm64/op_sget_short.S
index 19dbba6..5900231 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_short.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_short.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"sxth w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetShortStatic", "extend":"sxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_wide.S b/runtime/interpreter/mterp/arm64/op_sget_wide.S
index 287f66d..92f3f7d 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_wide.S
@@ -4,12 +4,12 @@
      */
     /* sget-wide vAA, field//BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64StaticFromCode
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w4, wINST, #8                 // w4<- AA
     cbnz  x3, MterpException            // bail out
diff --git a/runtime/interpreter/mterp/arm64/op_sput.S b/runtime/interpreter/mterp/arm64/op_sput.S
index 75f27ab..e322af0 100644
--- a/runtime/interpreter/mterp/arm64/op_sput.S
+++ b/runtime/interpreter/mterp/arm64/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
     /*
      * General SPUT handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm64/op_sput_boolean.S b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
index 11c55e5..9928f31 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_byte.S b/runtime/interpreter/mterp/arm64/op_sput_byte.S
index 11c55e5..16d6ba9 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_byte.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_byte.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_char.S b/runtime/interpreter/mterp/arm64/op_sput_char.S
index b4dd5aa..ab5e815 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_char.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_char.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_short.S b/runtime/interpreter/mterp/arm64/op_sput_short.S
index b4dd5aa..b54f88a 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_short.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_short.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
index a79b1a6..4aeb8ff 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S
@@ -3,15 +3,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   w0, 1                       // w0<- field ref BBBB
-    ldr     x1, [xFP, #OFF_FP_METHOD]
-    lsr     w2, wINST, #8               // w3<- AA
-    VREG_INDEX_TO_ADDR x2, w2
+    lsr     w1, wINST, #8               // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1
+    ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cbnz    w0, MterpException          // 0 on success, -1 on failure
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
index 6d9774c..b19426b 100644
--- a/runtime/interpreter/mterp/config_arm
+++ b/runtime/interpreter/mterp/config_arm
@@ -288,8 +288,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index 9f32695..0987964 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -286,8 +286,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
index 708a22b..fe07385 100644
--- a/runtime/interpreter/mterp/config_mips
+++ b/runtime/interpreter/mterp/config_mips
@@ -288,8 +288,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
index 7643a48..d24cf4d 100644
--- a/runtime/interpreter/mterp/config_mips64
+++ b/runtime/interpreter/mterp/config_mips64
@@ -288,8 +288,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index f454786..076baf2 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -292,8 +292,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
index dbfd3d1..44b671a 100644
--- a/runtime/interpreter/mterp/config_x86_64
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -292,8 +292,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/mips/op_sget.S b/runtime/interpreter/mterp/mips/op_sget.S
index 64ece1e..635df8a 100644
--- a/runtime/interpreter/mterp/mips/op_sget.S
+++ b/runtime/interpreter/mterp/mips/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+%default { "is_object":"0", "helper":"MterpGet32Static" }
     /*
      * General SGET handler.
      *
diff --git a/runtime/interpreter/mterp/mips/op_sget_boolean.S b/runtime/interpreter/mterp/mips/op_sget_boolean.S
index 45a5a70..7829970 100644
--- a/runtime/interpreter/mterp/mips/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/mips/op_sget_boolean.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_byte.S b/runtime/interpreter/mterp/mips/op_sget_byte.S
index 319122c..ee08342 100644
--- a/runtime/interpreter/mterp/mips/op_sget_byte.S
+++ b/runtime/interpreter/mterp/mips/op_sget_byte.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_char.S b/runtime/interpreter/mterp/mips/op_sget_char.S
index 7103847..d8b477a 100644
--- a/runtime/interpreter/mterp/mips/op_sget_char.S
+++ b/runtime/interpreter/mterp/mips/op_sget_char.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_object.S b/runtime/interpreter/mterp/mips/op_sget_object.S
index b205f51..2dc00c3 100644
--- a/runtime/interpreter/mterp/mips/op_sget_object.S
+++ b/runtime/interpreter/mterp/mips/op_sget_object.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "mips/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_short.S b/runtime/interpreter/mterp/mips/op_sget_short.S
index 3301823..ab55d93 100644
--- a/runtime/interpreter/mterp/mips/op_sget_short.S
+++ b/runtime/interpreter/mterp/mips/op_sget_short.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_wide.S b/runtime/interpreter/mterp/mips/op_sget_wide.S
index c729250..ec4295a 100644
--- a/runtime/interpreter/mterp/mips/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sget_wide.S
@@ -2,12 +2,12 @@
      * 64-bit SGET handler.
      */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGet64StaticFromCode)
+    JAL(MterpGet64Static)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_sput.S b/runtime/interpreter/mterp/mips/op_sput.S
index 7034a0e..37f8687 100644
--- a/runtime/interpreter/mterp/mips/op_sput.S
+++ b/runtime/interpreter/mterp/mips/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
     /*
      * General SPUT handler.
      *
diff --git a/runtime/interpreter/mterp/mips/op_sput_boolean.S b/runtime/interpreter/mterp/mips/op_sput_boolean.S
index 7909ef5..6426cd4 100644
--- a/runtime/interpreter/mterp/mips/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/mips/op_sput_boolean.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_byte.S b/runtime/interpreter/mterp/mips/op_sput_byte.S
index 7909ef5..c68d18f 100644
--- a/runtime/interpreter/mterp/mips/op_sput_byte.S
+++ b/runtime/interpreter/mterp/mips/op_sput_byte.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_char.S b/runtime/interpreter/mterp/mips/op_sput_char.S
index 188195c..9b8983e 100644
--- a/runtime/interpreter/mterp/mips/op_sput_char.S
+++ b/runtime/interpreter/mterp/mips/op_sput_char.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_short.S b/runtime/interpreter/mterp/mips/op_sput_short.S
index 188195c..5a57ed9 100644
--- a/runtime/interpreter/mterp/mips/op_sput_short.S
+++ b/runtime/interpreter/mterp/mips/op_sput_short.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_wide.S b/runtime/interpreter/mterp/mips/op_sput_wide.S
index 3b347fc..c090007 100644
--- a/runtime/interpreter/mterp/mips/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sput_wide.S
@@ -2,15 +2,15 @@
      * 64-bit SPUT handler.
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
-    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
-    GET_OPA(a2)                            # a2 <- AA
-    EAS2(a2, rFP, a2)                      # a2 <- &fp[AA]
+    GET_OPA(a1)                            # a1 <- AA
+    EAS2(a1, rFP, a1)                      # a1 <- &fp[AA]
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet64IndirectStaticFromMterp)
+    JAL(MterpSet64Static)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips64/op_sget.S b/runtime/interpreter/mterp/mips64/op_sget.S
index bd2cfe3..71046db 100644
--- a/runtime/interpreter/mterp/mips64/op_sget.S
+++ b/runtime/interpreter/mterp/mips64/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+%default { "is_object":"0", "helper":"MterpGet32Static", "extend":"" }
     /*
      * General SGET handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/mips64/op_sget_boolean.S b/runtime/interpreter/mterp/mips64/op_sget_boolean.S
index e7b1844..ec1ce9e 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_boolean.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"and v0, v0, 0xff"}
+%include "mips64/op_sget.S" {"helper":"MterpGetBooleanStatic", "extend":"and v0, v0, 0xff"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_byte.S b/runtime/interpreter/mterp/mips64/op_sget_byte.S
index 52a2e4a..6a802f6 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_byte.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_byte.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"seb v0, v0"}
+%include "mips64/op_sget.S" {"helper":"MterpGetByteStatic", "extend":"seb v0, v0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_char.S b/runtime/interpreter/mterp/mips64/op_sget_char.S
index 873d82a..483d085 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_char.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_char.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"and v0, v0, 0xffff"}
+%include "mips64/op_sget.S" {"helper":"MterpGetCharStatic", "extend":"and v0, v0, 0xffff"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_object.S b/runtime/interpreter/mterp/mips64/op_sget_object.S
index 3108417..2250696 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_object.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_object.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "mips64/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_short.S b/runtime/interpreter/mterp/mips64/op_sget_short.S
index fed4e76..b257bbb 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_short.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_short.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"seh v0, v0"}
+%include "mips64/op_sget.S" {"helper":"MterpGetShortStatic", "extend":"seh v0, v0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_wide.S b/runtime/interpreter/mterp/mips64/op_sget_wide.S
index 77124d1..ace64f8 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_wide.S
@@ -3,12 +3,12 @@
      *
      */
     /* sget-wide vAA, field//BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGet64StaticFromCode
+    jal     MterpGet64Static
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a4, rINST, 8                # a4 <- AA
     bnez    a3, MterpException          # bail out
diff --git a/runtime/interpreter/mterp/mips64/op_sput.S b/runtime/interpreter/mterp/mips64/op_sput.S
index 142f18f..466f333 100644
--- a/runtime/interpreter/mterp/mips64/op_sput.S
+++ b/runtime/interpreter/mterp/mips64/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode" }
+%default { "helper":"MterpSet32Static" }
     /*
      * General SPUT handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/mips64/op_sput_boolean.S b/runtime/interpreter/mterp/mips64/op_sput_boolean.S
index f5b8dbf..eba58f7 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_boolean.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_byte.S b/runtime/interpreter/mterp/mips64/op_sput_byte.S
index f5b8dbf..80a26c0 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_byte.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_byte.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_char.S b/runtime/interpreter/mterp/mips64/op_sput_char.S
index c4d195c..c0d5bf3 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_char.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_char.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_short.S b/runtime/interpreter/mterp/mips64/op_sput_short.S
index c4d195c..b001832 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_short.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_short.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_wide.S b/runtime/interpreter/mterp/mips64/op_sput_wide.S
index 828ddc1..aa3d5b4 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_wide.S
@@ -3,15 +3,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
-    ld      a1, OFF_FP_METHOD(rFP)
-    srl     a2, rINST, 8                # a2 <- AA
-    dlsa    a2, a2, rFP, 2
+    srl     a1, rINST, 8                # a2 <- AA
+    dlsa    a1, a1, rFP, 2
+    ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet64IndirectStaticFromMterp
+    jal     MterpSet64Static
     bnezc   v0, MterpException          # 0 on success, -1 on failure
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 75ab91a..a53040c 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -18,6 +18,7 @@
  * Mterp entry point and support functions.
  */
 #include "interpreter/interpreter_common.h"
+#include "interpreter/interpreter_intrinsics.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
 #include "debugger.h"
@@ -157,7 +158,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvoke<kVirtual, false, false>(
+  return DoFastInvoke<kVirtual>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
@@ -190,7 +191,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvoke<kDirect, false, false>(
+  return DoFastInvoke<kDirect>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
@@ -201,7 +202,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvoke<kStatic, false, false>(
+  return DoFastInvoke<kStatic>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
@@ -267,6 +268,18 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
+  const uint32_t vregC = inst->VRegC_35c();
+  const uint32_t vtable_idx = inst->VRegB_35c();
+  ObjPtr<mirror::Object> const receiver = shadow_frame->GetVRegReference(vregC);
+  if (receiver != nullptr) {
+    ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
+        vtable_idx, kRuntimePointerSize);
+    if ((called_method != nullptr) && called_method->IsIntrinsic()) {
+      if (MterpHandleIntrinsic(shadow_frame, called_method, inst, inst_data, result_register)) {
+        return !self->IsExceptionPending();
+      }
+    }
+  }
   return DoInvokeVirtualQuick<false>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
@@ -587,27 +600,6 @@
   return MterpShouldSwitchInterpreters();
 }
 
-extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx,
-                                                   ArtMethod* referrer,
-                                                   uint64_t* new_value,
-                                                   Thread* self)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
-  if (LIKELY(field != nullptr)) {
-    // Compiled code can't use transactional mode.
-    field->Set64<false>(field->GetDeclaringClass(), *new_value);
-    return 0;  // success
-  }
-  field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t));
-  if (LIKELY(field != nullptr)) {
-    // Compiled code can't use transactional mode.
-    field->Set64<false>(field->GetDeclaringClass(), *new_value);
-    return 0;  // success
-  }
-  return -1;  // failure
-}
-
 extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
                                             mirror::Object* obj,
                                             uint8_t new_value,
@@ -689,7 +681,187 @@
   return -1;  // failure
 }
 
-extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
+template <typename return_type, Primitive::Type primitive_type>
+ALWAYS_INLINE return_type MterpGetStatic(uint32_t field_idx,
+                                         ArtMethod* referrer,
+                                         Thread* self,
+                                         return_type (ArtField::*func)(ObjPtr<mirror::Object>))
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return_type res = 0;  // On exception, the result will be ignored.
+  ArtField* f =
+      FindFieldFromCode<StaticPrimitiveRead, false>(field_idx,
+                                                    referrer,
+                                                    self,
+                                                    primitive_type);
+  if (LIKELY(f != nullptr)) {
+    ObjPtr<mirror::Object> obj = f->GetDeclaringClass();
+    res = (f->*func)(obj);
+  }
+  return res;
+}
+
+extern "C" int32_t MterpGetBooleanStatic(uint32_t field_idx,
+                                         ArtMethod* referrer,
+                                         Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<uint8_t, Primitive::kPrimBoolean>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          &ArtField::GetBoolean);
+}
+
+extern "C" int32_t MterpGetByteStatic(uint32_t field_idx,
+                                      ArtMethod* referrer,
+                                      Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int8_t, Primitive::kPrimByte>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::GetByte);
+}
+
+extern "C" uint32_t MterpGetCharStatic(uint32_t field_idx,
+                                       ArtMethod* referrer,
+                                       Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<uint16_t, Primitive::kPrimChar>(field_idx,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::GetChar);
+}
+
+extern "C" int32_t MterpGetShortStatic(uint32_t field_idx,
+                                       ArtMethod* referrer,
+                                       Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int16_t, Primitive::kPrimShort>(field_idx,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::GetShort);
+}
+
+extern "C" mirror::Object* MterpGetObjStatic(uint32_t field_idx,
+                                             ArtMethod* referrer,
+                                             Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<ObjPtr<mirror::Object>, Primitive::kPrimNot>(field_idx,
+                                                                     referrer,
+                                                                     self,
+                                                                     &ArtField::GetObject).Ptr();
+}
+
+extern "C" int32_t MterpGet32Static(uint32_t field_idx,
+                                    ArtMethod* referrer,
+                                    Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int32_t, Primitive::kPrimInt>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::GetInt);
+}
+
+extern "C" int64_t MterpGet64Static(uint32_t field_idx, ArtMethod* referrer, Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int64_t, Primitive::kPrimLong>(field_idx,
+                                                       referrer,
+                                                       self,
+                                                       &ArtField::GetLong);
+}
+
+
+template <typename field_type, Primitive::Type primitive_type>
+int MterpSetStatic(uint32_t field_idx,
+                   field_type new_value,
+                   ArtMethod* referrer,
+                   Thread* self,
+                   void (ArtField::*func)(ObjPtr<mirror::Object>, field_type val))
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  int res = 0;  // Assume success (following quick_field_entrypoints conventions)
+  ArtField* f =
+      FindFieldFromCode<StaticPrimitiveWrite, false>(field_idx, referrer, self, primitive_type);
+  if (LIKELY(f != nullptr)) {
+    ObjPtr<mirror::Object> obj = f->GetDeclaringClass();
+    (f->*func)(obj, new_value);
+  } else {
+    res = -1;  // Failure
+  }
+  return res;
+}
+
+extern "C" int MterpSetBooleanStatic(uint32_t field_idx,
+                                     uint8_t new_value,
+                                     ArtMethod* referrer,
+                                     Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<uint8_t, Primitive::kPrimBoolean>(field_idx,
+                                                          new_value,
+                                                          referrer,
+                                                          self,
+                                                          &ArtField::SetBoolean<false>);
+}
+
+extern "C" int MterpSetByteStatic(uint32_t field_idx,
+                                  int8_t new_value,
+                                  ArtMethod* referrer,
+                                  Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int8_t, Primitive::kPrimByte>(field_idx,
+                                                      new_value,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::SetByte<false>);
+}
+
+extern "C" int MterpSetCharStatic(uint32_t field_idx,
+                                  uint16_t new_value,
+                                  ArtMethod* referrer,
+                                  Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<uint16_t, Primitive::kPrimChar>(field_idx,
+                                                        new_value,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::SetChar<false>);
+}
+
+extern "C" int MterpSetShortStatic(uint32_t field_idx,
+                                   int16_t new_value,
+                                   ArtMethod* referrer,
+                                   Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int16_t, Primitive::kPrimShort>(field_idx,
+                                                        new_value,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::SetShort<false>);
+}
+
+extern "C" int MterpSet32Static(uint32_t field_idx,
+                                int32_t new_value,
+                                ArtMethod* referrer,
+                                Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int32_t, Primitive::kPrimInt>(field_idx,
+                                                      new_value,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::SetInt<false>);
+}
+
+extern "C" int MterpSet64Static(uint32_t field_idx,
+                                int64_t* new_value,
+                                ArtMethod* referrer,
+                                Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int64_t, Primitive::kPrimLong>(field_idx,
+                                                       *new_value,
+                                                       referrer,
+                                                       self,
+                                                       &ArtField::SetLong<false>);
+}
+
+extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr,
+                                                  int32_t index)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (UNLIKELY(arr == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
@@ -703,7 +875,8 @@
   }
 }
 
-extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
+extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj,
+                                                  uint32_t field_offset)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 8916241..e2b693f 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -2631,12 +2631,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGet32StaticFromCode
+    bl    MterpGet32Static
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2661,12 +2661,12 @@
      */
     /* sget-wide vAA, field@BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
     VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
@@ -2690,12 +2690,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetObjStaticFromCode
+    bl    MterpGetObjStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2723,12 +2723,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetBooleanStaticFromCode
+    bl    MterpGetBooleanStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2756,12 +2756,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetByteStaticFromCode
+    bl    MterpGetByteStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2789,12 +2789,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetCharStaticFromCode
+    bl    MterpGetCharStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2822,12 +2822,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetShortStaticFromCode
+    bl    MterpGetShortStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2860,7 +2860,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet32StaticFromCode
+    bl      MterpSet32Static
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2876,15 +2876,15 @@
      *
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   r0, 1                       @ r0<- field ref BBBB
-    ldr     r1, [rFP, #OFF_FP_METHOD]
-    mov     r2, rINST, lsr #8           @ r3<- AA
-    VREG_INDEX_TO_ADDR r2, r2
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    VREG_INDEX_TO_ADDR r1, r1
+    ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2925,7 +2925,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetBooleanStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2951,7 +2951,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetByteStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2977,7 +2977,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetCharStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -3003,7 +3003,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetShortStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -7347,24 +7347,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: arm/op_unused_fc.S */
-/* File: arm/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: arm/op_unused_fd.S */
-/* File: arm/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
@@ -11763,7 +11755,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11780,7 +11772,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 7d442c0..ef5a4da 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -2543,12 +2543,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGet32StaticFromCode
+    bl    MterpGet32Static
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     
@@ -2573,12 +2573,12 @@
      */
     /* sget-wide vAA, field//BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64StaticFromCode
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w4, wINST, #8                 // w4<- AA
     cbnz  x3, MterpException            // bail out
@@ -2599,12 +2599,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetObjStaticFromCode
+    bl    MterpGetObjStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     
@@ -2632,12 +2632,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetBooleanStaticFromCode
+    bl    MterpGetBooleanStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     uxtb w0, w0
@@ -2665,12 +2665,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetByteStaticFromCode
+    bl    MterpGetByteStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     sxtb w0, w0
@@ -2698,12 +2698,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetCharStaticFromCode
+    bl    MterpGetCharStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     uxth w0, w0
@@ -2731,12 +2731,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetShortStaticFromCode
+    bl    MterpGetShortStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     sxth w0, w0
@@ -2769,7 +2769,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet32StaticFromCode
+    bl      MterpSet32Static
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2784,15 +2784,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   w0, 1                       // w0<- field ref BBBB
-    ldr     x1, [xFP, #OFF_FP_METHOD]
-    lsr     w2, wINST, #8               // w3<- AA
-    VREG_INDEX_TO_ADDR x2, w2
+    lsr     w1, wINST, #8               // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1
+    ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cbnz    w0, MterpException          // 0 on success, -1 on failure
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -2831,7 +2831,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetBooleanStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2856,7 +2856,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetByteStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2881,7 +2881,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetCharStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2906,7 +2906,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetShortStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -6914,24 +6914,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: arm64/op_unused_fc.S */
-/* File: arm64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: arm64/op_unused_fd.S */
-/* File: arm64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
@@ -11580,7 +11572,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11597,7 +11589,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index e154e6c..579afc2 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -3038,12 +3038,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGet32StaticFromCode)
+    JAL(MterpGet32Static)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3064,12 +3064,12 @@
      * 64-bit SGET handler.
      */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGet64StaticFromCode)
+    JAL(MterpGet64Static)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
@@ -3088,12 +3088,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetObjStaticFromCode)
+    JAL(MterpGetObjStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3118,12 +3118,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetBooleanStaticFromCode)
+    JAL(MterpGetBooleanStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3148,12 +3148,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetByteStaticFromCode)
+    JAL(MterpGetByteStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3178,12 +3178,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetCharStaticFromCode)
+    JAL(MterpGetCharStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3208,12 +3208,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetShortStaticFromCode)
+    JAL(MterpGetShortStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3244,7 +3244,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet32StaticFromCode)
+    JAL(MterpSet32Static)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3258,15 +3258,15 @@
      * 64-bit SPUT handler.
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
-    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
-    GET_OPA(a2)                            # a2 <- AA
-    EAS2(a2, rFP, a2)                      # a2 <- &fp[AA]
+    GET_OPA(a1)                            # a1 <- AA
+    EAS2(a1, rFP, a1)                      # a1 <- &fp[AA]
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet64IndirectStaticFromMterp)
+    JAL(MterpSet64Static)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3311,7 +3311,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet8StaticFromCode)
+    JAL(MterpSetBooleanStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3336,7 +3336,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet8StaticFromCode)
+    JAL(MterpSetByteStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3361,7 +3361,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet16StaticFromCode)
+    JAL(MterpSetCharStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3386,7 +3386,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet16StaticFromCode)
+    JAL(MterpSetShortStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -7761,25 +7761,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: mips/op_unused_fc.S */
-/* File: mips/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
-
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: mips/op_unused_fd.S */
-/* File: mips/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
-
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 /* ------------------------------ */
     .balign 128
@@ -12423,7 +12413,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12441,7 +12431,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 013bb32..3656df9 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -2585,12 +2585,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGet32StaticFromCode
+    jal     MterpGet32Static
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     
@@ -2614,12 +2614,12 @@
      *
      */
     /* sget-wide vAA, field//BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGet64StaticFromCode
+    jal     MterpGet64Static
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a4, rINST, 8                # a4 <- AA
     bnez    a3, MterpException          # bail out
@@ -2639,12 +2639,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetObjStaticFromCode
+    jal     MterpGetObjStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     
@@ -2671,12 +2671,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetBooleanStaticFromCode
+    jal     MterpGetBooleanStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     and v0, v0, 0xff
@@ -2703,12 +2703,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetByteStaticFromCode
+    jal     MterpGetByteStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     seb v0, v0
@@ -2735,12 +2735,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetCharStaticFromCode
+    jal     MterpGetCharStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     and v0, v0, 0xffff
@@ -2767,12 +2767,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetShortStaticFromCode
+    jal     MterpGetShortStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     seh v0, v0
@@ -2798,7 +2798,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet32StaticFromCode
+    .extern MterpSet32Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2806,7 +2806,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet32StaticFromCode
+    jal     MterpSet32Static
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2821,15 +2821,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
-    ld      a1, OFF_FP_METHOD(rFP)
-    srl     a2, rINST, 8                # a2 <- AA
-    dlsa    a2, a2, rFP, 2
+    srl     a1, rINST, 8                # a2 <- AA
+    dlsa    a1, a1, rFP, 2
+    ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet64IndirectStaticFromMterp
+    jal     MterpSet64Static
     bnezc   v0, MterpException          # 0 on success, -1 on failure
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2862,7 +2862,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetBooleanStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2870,7 +2870,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet8StaticFromCode
+    jal     MterpSetBooleanStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2888,7 +2888,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetByteStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2896,7 +2896,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet8StaticFromCode
+    jal     MterpSetByteStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2914,7 +2914,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetCharStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2922,7 +2922,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet16StaticFromCode
+    jal     MterpSetCharStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2940,7 +2940,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetShortStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2948,7 +2948,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet16StaticFromCode
+    jal     MterpSetShortStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -7084,26 +7084,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: mips64/op_unused_fc.S */
-/* File: mips64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
     b       MterpFallback
 
-
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: mips64/op_unused_fd.S */
-/* File: mips64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
     b       MterpFallback
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_unused_fe: /* 0xfe */
@@ -11982,7 +11972,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12001,7 +11991,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 695d1e4..21d9671 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -2535,7 +2535,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2543,7 +2543,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGet32StaticFromCode)
+    call    SYMBOL(MterpGet32Static)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2564,7 +2564,7 @@
  *
  */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2572,7 +2572,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGet64StaticFromCode)
+    call    SYMBOL(MterpGet64Static)
     movl    rSELF, %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
@@ -2592,7 +2592,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2600,7 +2600,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetObjStaticFromCode)
+    call    SYMBOL(MterpGetObjStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2624,7 +2624,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2632,7 +2632,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetBooleanStaticFromCode)
+    call    SYMBOL(MterpGetBooleanStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2656,7 +2656,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2664,7 +2664,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetByteStaticFromCode)
+    call    SYMBOL(MterpGetByteStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2688,7 +2688,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2696,7 +2696,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetCharStaticFromCode)
+    call    SYMBOL(MterpGetCharStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2720,7 +2720,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2728,7 +2728,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetShortStaticFromCode)
+    call    SYMBOL(MterpGetShortStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2751,7 +2751,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet32StaticFromCode
+    .extern MterpSet32Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2761,7 +2761,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet32StaticFromCode)
+    call    SYMBOL(MterpSet32Static)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2776,17 +2776,17 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG1(%esp)            # referrer
     leal    VREG_ADDRESS(rINST), %eax
-    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    %eax, OUT_ARG1(%esp)            # &fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2821,7 +2821,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetBooleanStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2831,7 +2831,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetBooleanStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2849,7 +2849,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetByteStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2859,7 +2859,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetByteStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2877,7 +2877,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetCharStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2887,7 +2887,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetCharStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2905,7 +2905,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetShortStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2915,7 +2915,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetShortStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -6292,23 +6292,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: x86/op_unused_fc.S */
-/* File: x86/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: x86/op_unused_fd.S */
-/* File: x86/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
@@ -12410,7 +12402,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12434,7 +12426,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 2eab58c..b5a5ae5 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -2445,12 +2445,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGet32StaticFromCode)
+    call    SYMBOL(MterpGet32Static)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2476,12 +2476,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGet64StaticFromCode)
+    call    SYMBOL(MterpGet64Static)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2508,12 +2508,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetObjStaticFromCode)
+    call    SYMBOL(MterpGetObjStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2540,12 +2540,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetBooleanStaticFromCode)
+    call    SYMBOL(MterpGetBooleanStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2572,12 +2572,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetByteStaticFromCode)
+    call    SYMBOL(MterpGetByteStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2604,12 +2604,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetCharStaticFromCode)
+    call    SYMBOL(MterpGetCharStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2636,12 +2636,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetShortStaticFromCode)
+    call    SYMBOL(MterpGetShortStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2667,13 +2667,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet32StaticFromCode
+    .extern MterpSet32Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet32StaticFromCode)
+    call    SYMBOL(MterpSet32Static)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2687,13 +2687,13 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
-    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
-    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[AA]
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG1  # &fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2724,13 +2724,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetBooleanStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetBooleanStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2747,13 +2747,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetByteStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetByteStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2770,13 +2770,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetCharStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetCharStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2793,13 +2793,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetShortStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetShortStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -6057,23 +6057,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: x86_64/op_unused_fc.S */
-/* File: x86_64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: x86_64/op_unused_fd.S */
-/* File: x86_64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
@@ -11671,7 +11663,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11693,7 +11685,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
index 0e9a3d8..6e42d32 100644
--- a/runtime/interpreter/mterp/x86/op_sget.S
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+%default { "is_object":"0", "helper":"MterpGet32Static" }
 /*
  * General SGET handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86/op_sget_boolean.S b/runtime/interpreter/mterp/x86/op_sget_boolean.S
index f058dd8..5fa2bf0 100644
--- a/runtime/interpreter/mterp/x86/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/x86/op_sget_boolean.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_byte.S b/runtime/interpreter/mterp/x86/op_sget_byte.S
index c952f40..ef812f1 100644
--- a/runtime/interpreter/mterp/x86/op_sget_byte.S
+++ b/runtime/interpreter/mterp/x86/op_sget_byte.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_char.S b/runtime/interpreter/mterp/x86/op_sget_char.S
index d7bd410..3bc34ef 100644
--- a/runtime/interpreter/mterp/x86/op_sget_char.S
+++ b/runtime/interpreter/mterp/x86/op_sget_char.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_object.S b/runtime/interpreter/mterp/x86/op_sget_object.S
index 1c95f9a..b829e75 100644
--- a/runtime/interpreter/mterp/x86/op_sget_object.S
+++ b/runtime/interpreter/mterp/x86/op_sget_object.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "x86/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_short.S b/runtime/interpreter/mterp/x86/op_sget_short.S
index 6475306..449cf6f 100644
--- a/runtime/interpreter/mterp/x86/op_sget_short.S
+++ b/runtime/interpreter/mterp/x86/op_sget_short.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
index 2b60303..a605bcf 100644
--- a/runtime/interpreter/mterp/x86/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -3,7 +3,7 @@
  *
  */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -11,7 +11,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGet64StaticFromCode)
+    call    SYMBOL(MterpGet64Static)
     movl    rSELF, %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
index 0b5de09..99f6088 100644
--- a/runtime/interpreter/mterp/x86/op_sput.S
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
 /*
  * General SPUT handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86/op_sput_boolean.S b/runtime/interpreter/mterp/x86/op_sput_boolean.S
index 63601bd..a7fffda 100644
--- a/runtime/interpreter/mterp/x86/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/x86/op_sput_boolean.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_byte.S b/runtime/interpreter/mterp/x86/op_sput_byte.S
index 63601bd..3a5ff92 100644
--- a/runtime/interpreter/mterp/x86/op_sput_byte.S
+++ b/runtime/interpreter/mterp/x86/op_sput_byte.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_char.S b/runtime/interpreter/mterp/x86/op_sput_char.S
index 1749f7c..565cc2a 100644
--- a/runtime/interpreter/mterp/x86/op_sput_char.S
+++ b/runtime/interpreter/mterp/x86/op_sput_char.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_short.S b/runtime/interpreter/mterp/x86/op_sput_short.S
index 1749f7c..85c3441 100644
--- a/runtime/interpreter/mterp/x86/op_sput_short.S
+++ b/runtime/interpreter/mterp/x86/op_sput_short.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
index 19cff0d..8cc7e28 100644
--- a/runtime/interpreter/mterp/x86/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -3,17 +3,17 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG1(%esp)            # referrer
     leal    VREG_ADDRESS(rINST), %eax
-    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    %eax, OUT_ARG1(%esp)            # &fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
diff --git a/runtime/interpreter/mterp/x86_64/op_sget.S b/runtime/interpreter/mterp/x86_64/op_sget.S
index d39e6c4..e996c77 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode", "wide":"0" }
+%default { "is_object":"0", "helper":"MterpGet32Static", "wide":"0" }
 /*
  * General SGET handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_boolean.S b/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
index 7d358da..ee772ad 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_byte.S b/runtime/interpreter/mterp/x86_64/op_sget_byte.S
index 79d9ff4..f65ea49 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_byte.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_byte.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_char.S b/runtime/interpreter/mterp/x86_64/op_sget_char.S
index 4488610..3972551 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_char.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_char.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_object.S b/runtime/interpreter/mterp/x86_64/op_sget_object.S
index 09b627e..a0bbfd8 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_object.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_object.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "x86_64/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_short.S b/runtime/interpreter/mterp/x86_64/op_sget_short.S
index 47ac238..df212dc 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_short.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_short.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_wide.S b/runtime/interpreter/mterp/x86_64/op_sget_wide.S
index aa22343..1e98e28 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_wide.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGet64StaticFromCode", "wide":"1"}
+%include "x86_64/op_sget.S" {"helper":"MterpGet64Static", "wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput.S b/runtime/interpreter/mterp/x86_64/op_sput.S
index e92b032..9705619 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
 /*
  * General SPUT handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_boolean.S b/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
index 8718915..8bf4a62 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_byte.S b/runtime/interpreter/mterp/x86_64/op_sput_byte.S
index 8718915..5bb26eb 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_byte.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_byte.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_char.S b/runtime/interpreter/mterp/x86_64/op_sput_char.S
index 2fe9d14..42b244e 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_char.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_char.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_short.S b/runtime/interpreter/mterp/x86_64/op_sput_short.S
index 2fe9d14..9670092 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_short.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_short.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_wide.S b/runtime/interpreter/mterp/x86_64/op_sput_wide.S
index c4bc269..a21bcb5 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_wide.S
@@ -3,13 +3,13 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
-    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
-    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[AA]
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG1  # &fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index af0478c..eb0a9d1 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1330,17 +1330,18 @@
   result->SetC(string->CharAt(index));
 }
 
-// This allows setting chars from the new style of String objects during compilation.
-void UnstartedRuntime::UnstartedStringSetCharAt(
-    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset) {
-  jint index = shadow_frame->GetVReg(arg_offset + 1);
-  jchar c = shadow_frame->GetVReg(arg_offset + 2);
-  mirror::String* string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+// This allows creating String objects with replaced characters during compilation.
+// String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
+void UnstartedRuntime::UnstartedStringDoReplace(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  jchar old_c = shadow_frame->GetVReg(arg_offset + 1);
+  jchar new_c = shadow_frame->GetVReg(arg_offset + 2);
+  ObjPtr<mirror::String> string = shadow_frame->GetVRegReference(arg_offset)->AsString();
   if (string == nullptr) {
-    AbortTransactionOrFail(self, "String.setCharAt with null object");
+    AbortTransactionOrFail(self, "String.replaceWithMatch with null object");
     return;
   }
-  string->SetCharAt(index, c);
+  result->SetL(string->DoReplace(self, old_c, new_c));
 }
 
 // This allows creating the new style of String objects during compilation.
@@ -1671,6 +1672,12 @@
   }
 }
 
+void UnstartedRuntime::UnstartedSystemIdentityHashCode(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  result->SetI((obj != nullptr) ? obj->IdentityHashCode() : 0);
+}
 
 void UnstartedRuntime::UnstartedJNIVMRuntimeNewUnpaddedArray(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
@@ -1835,13 +1842,6 @@
   }
 }
 
-void UnstartedRuntime::UnstartedJNISystemIdentityHashCode(
-    Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
-    mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args, JValue* result) {
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(args[0]);
-  result->SetI((obj != nullptr) ? obj->IdentityHashCode() : 0);
-}
-
 void UnstartedRuntime::UnstartedJNIByteOrderIsLittleEndian(
     Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
     mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args ATTRIBUTE_UNUSED, JValue* result) {
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index 6fc7989..2560a92 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -63,7 +63,7 @@
   V(RuntimeAvailableProcessors, "int java.lang.Runtime.availableProcessors()") \
   V(StringGetCharsNoCheck, "void java.lang.String.getCharsNoCheck(int, int, char[], int)") \
   V(StringCharAt, "char java.lang.String.charAt(int)") \
-  V(StringSetCharAt, "void java.lang.String.setCharAt(int, char)") \
+  V(StringDoReplace, "java.lang.String java.lang.String.doReplace(char, char)") \
   V(StringFactoryNewStringFromChars, "java.lang.String java.lang.StringFactory.newStringFromChars(int, int, char[])") \
   V(StringFactoryNewStringFromString, "java.lang.String java.lang.StringFactory.newStringFromString(java.lang.String)") \
   V(StringFastSubstring, "java.lang.String java.lang.String.fastSubstring(int, int)") \
@@ -76,7 +76,8 @@
   V(UnsafePutObjectVolatile, "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") \
   V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") \
   V(IntegerParseInt, "int java.lang.Integer.parseInt(java.lang.String)") \
-  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)")
+  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)") \
+  V(SystemIdentityHashCode, "int java.lang.System.identityHashCode(java.lang.Object)")
 
 // Methods that are native.
 #define UNSTARTED_RUNTIME_JNI_LIST(V)           \
@@ -98,7 +99,6 @@
   V(ArrayCreateMultiArray, "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") \
   V(ArrayCreateObjectArray, "java.lang.Object java.lang.reflect.Array.createObjectArray(java.lang.Class, int)") \
   V(ThrowableNativeFillInStackTrace, "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") \
-  V(SystemIdentityHashCode, "int java.lang.System.identityHashCode(java.lang.Object)") \
   V(ByteOrderIsLittleEndian, "boolean java.nio.ByteOrder.isLittleEndian()") \
   V(UnsafeCompareAndSwapInt, "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") \
   V(UnsafeGetIntVolatile, "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") \
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index db222fa..56e261c 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -1367,5 +1367,26 @@
   ShadowFrame::DeleteDeoptimizedFrame(shadow_frame);
 }
 
+TEST_F(UnstartedRuntimeTest, IdentityHashCode) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  JValue result;
+  UnstartedSystemIdentityHashCode(self, tmp, &result, 0);
+
+  EXPECT_EQ(0, result.GetI());
+  ASSERT_FALSE(self->IsExceptionPending());
+
+  ObjPtr<mirror::String> str = mirror::String::AllocFromModifiedUtf8(self, "abd");
+  tmp->SetVRegReference(0, str.Ptr());
+  UnstartedSystemIdentityHashCode(self, tmp, &result, 0);
+  EXPECT_NE(0, result.GetI());
+  EXPECT_EQ(str->IdentityHashCode(), result.GetI());
+  ASSERT_FALSE(self->IsExceptionPending());
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 7707ba4..e6c6068 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -239,6 +239,7 @@
       shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock),
       shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_),
       processing_request_(false) {
+  Locks::AddToExpectedMutexesOnWeakRefAccess(&event_list_lock_);
 }
 
 /*
@@ -381,6 +382,8 @@
   CHECK(netState == nullptr);
 
   ResetState();
+
+  Locks::RemoveFromExpectedMutexesOnWeakRefAccess(&event_list_lock_);
 }
 
 /*
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index bd7251b..510f5f0 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -35,6 +35,11 @@
 
 ObjectRegistry::ObjectRegistry()
     : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), next_id_(1) {
+  Locks::AddToExpectedMutexesOnWeakRefAccess(&lock_);
+}
+
+ObjectRegistry::~ObjectRegistry() {
+  Locks::RemoveFromExpectedMutexesOnWeakRefAccess(&lock_);
 }
 
 JDWP::RefTypeId ObjectRegistry::AddRefType(ObjPtr<mirror::Class> c) {
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 9cacc66..8754631 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -62,6 +62,7 @@
 class ObjectRegistry {
  public:
   ObjectRegistry();
+  ~ObjectRegistry();
 
   JDWP::ObjectId Add(ObjPtr<mirror::Object> o)
       REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 1ec4749..3631a9d 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -325,16 +325,12 @@
 }
 
 void Jit::StartProfileSaver(const std::string& filename,
-                            const std::vector<std::string>& code_paths,
-                            const std::string& foreign_dex_profile_path,
-                            const std::string& app_dir) {
+                            const std::vector<std::string>& code_paths) {
   if (profile_saver_options_.IsEnabled()) {
     ProfileSaver::Start(profile_saver_options_,
                         filename,
                         code_cache_.get(),
-                        code_paths,
-                        foreign_dex_profile_path,
-                        app_dir);
+                        code_paths);
   }
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index d566799..5da1ea1 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -136,14 +136,8 @@
   // Starts the profile saver if the config options allow profile recording.
   // The profile will be stored in the specified `filename` and will contain
   // information collected from the given `code_paths` (a set of dex locations).
-  // The `foreign_dex_profile_path` is the path where the saver will put the
-  // profile markers for loaded dex files which are not owned by the application.
-  // The `app_dir` is the application directory and is used to decide which
-  // dex files belong to the application.
   void StartProfileSaver(const std::string& filename,
-                         const std::vector<std::string>& code_paths,
-                         const std::string& foreign_dex_profile_path,
-                         const std::string& app_dir);
+                         const std::vector<std::string>& code_paths);
   void StopProfileSaver();
 
   void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index c226a38..e7b23dc 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -987,8 +987,11 @@
           const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
           if (ContainsPc(entry_point)) {
             info->SetSavedEntryPoint(entry_point);
-            Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-                info->GetMethod(), GetQuickToInterpreterBridge());
+            // Don't call Instrumentation::UpdateMethods, as it can check the declaring
+            // class of the method. We may be concurrently running a GC which makes accessing
+            // the class unsafe. We know it is OK to bypass the instrumentation as we've just
+            // checked that the current entry point is JIT compiled code.
+            info->GetMethod()->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
           }
         }
 
@@ -1259,22 +1262,53 @@
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
       std::vector<ProfileMethodInfo::ProfileClassReference> profile_classes;
       const InlineCache& cache = info->cache_[i];
+      ArtMethod* caller = info->GetMethod();
+      bool is_missing_types = false;
       for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) {
         mirror::Class* cls = cache.classes_[k].Read();
         if (cls == nullptr) {
           break;
         }
-        const DexFile& class_dex_file = cls->GetDexFile();
-        dex::TypeIndex type_index = cls->GetDexTypeIndex();
-        if (ContainsElement(dex_base_locations, class_dex_file.GetBaseLocation())) {
+
+        // Check if the receiver is in the boot class path or if it's in the
+        // same class loader as the caller. If not, skip it, as there is not
+        // much we can do during AOT.
+        if (!cls->IsBootStrapClassLoaded() &&
+            caller->GetClassLoader() != cls->GetClassLoader()) {
+          is_missing_types = true;
+          continue;
+        }
+
+        const DexFile* class_dex_file = nullptr;
+        dex::TypeIndex type_index;
+
+        if (cls->GetDexCache() == nullptr) {
+          DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
+          // Make a best effort to find the type index in the method's dex file.
+          // We could search all open dex files but that might turn expensive
+          // and probably not worth it.
+          class_dex_file = dex_file;
+          type_index = cls->FindTypeIndexInOtherDexFile(*dex_file);
+        } else {
+          class_dex_file = &(cls->GetDexFile());
+          type_index = cls->GetDexTypeIndex();
+        }
+        if (!type_index.IsValid()) {
+          // Could be a proxy class or an array for which we couldn't find the type index.
+          is_missing_types = true;
+          continue;
+        }
+        if (ContainsElement(dex_base_locations, class_dex_file->GetBaseLocation())) {
           // Only consider classes from the same apk (including multidex).
           profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/
-              &class_dex_file, type_index);
+              class_dex_file, type_index);
+        } else {
+          is_missing_types = true;
         }
       }
       if (!profile_classes.empty()) {
         inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/
-            cache.dex_pc_, profile_classes);
+            cache.dex_pc_, is_missing_types, profile_classes);
       }
     }
     methods.emplace_back(/*ProfileMethodInfo*/
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 33a792f..c970979 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -229,6 +229,12 @@
   void MoveObsoleteMethod(ArtMethod* old_method, ArtMethod* new_method)
       REQUIRES(!lock_) REQUIRES(Locks::mutator_lock_);
 
+  // Dynamically change whether we want to garbage collect code. Should only be used
+  // by tests.
+  void SetGarbageCollectCode(bool value) {
+    garbage_collect_code_ = value;
+  }
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -359,8 +365,8 @@
   // It is atomic to avoid locking when reading it.
   Atomic<uint64_t> last_update_time_ns_;
 
-  // Whether we can do garbage collection.
-  const bool garbage_collect_code_;
+  // Whether we can do garbage collection. Not 'const' as tests may override this.
+  bool garbage_collect_code_;
 
   // The size in bytes of used memory for the data portion of the code cache.
   size_t used_memory_for_data_ GUARDED_BY(lock_);
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 5638ce1..b23a863 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -37,7 +37,8 @@
 namespace art {
 
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '3', '\0' };  // inline caches
+// Last profile version: fix the order of dex files in the profile.
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '4', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -46,16 +47,19 @@
 // using the same test profile.
 static constexpr bool kDebugIgnoreChecksum = false;
 
-static constexpr uint8_t kMegamorphicEncoding = 7;
+static constexpr uint8_t kIsMissingTypesEncoding = 6;
+static constexpr uint8_t kIsMegamorphicEncoding = 7;
 
 static_assert(sizeof(InlineCache::kIndividualCacheSize) == sizeof(uint8_t),
               "InlineCache::kIndividualCacheSize does not have the expect type size");
-static_assert(InlineCache::kIndividualCacheSize < kMegamorphicEncoding,
+static_assert(InlineCache::kIndividualCacheSize < kIsMegamorphicEncoding,
+              "InlineCache::kIndividualCacheSize is larger than expected");
+static_assert(InlineCache::kIndividualCacheSize < kIsMissingTypesEncoding,
               "InlineCache::kIndividualCacheSize is larger than expected");
 
 void ProfileCompilationInfo::DexPcData::AddClass(uint16_t dex_profile_idx,
                                                  const dex::TypeIndex& type_idx) {
-  if (is_megamorphic) {
+  if (is_megamorphic || is_missing_types) {
     return;
   }
   classes.emplace(dex_profile_idx, type_idx);
@@ -206,7 +210,8 @@
  *       Classes are grouped per their dex files and the line
  *       `dex_profile_index,class_id1,class_id2...,dex_profile_index2,...` encodes the
  *       mapping from `dex_profile_index` to the set of classes `class_id1,class_id2...`
- *    M stands for megamorphic and it's encoded as the byte kMegamorphicEncoding.
+ *    M stands for megamorphic or missing types and it's encoded as either
+ *    the byte kIsMegamorphicEncoding or kIsMissingTypesEncoding.
  *    When present, there will be no class ids following.
  **/
 bool ProfileCompilationInfo::Save(int fd) {
@@ -222,15 +227,23 @@
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
   AddUintToBuffer(&buffer, static_cast<uint8_t>(info_.size()));
 
+  // Make sure we write the dex files in order of their profile index. This
+  // avoids writing the index in the output file and simplifies the parsing logic.
+  std::vector<const std::string*> ordered_info_location(info_.size());
+  std::vector<const DexFileData*> ordered_info_data(info_.size());
   for (const auto& it : info_) {
+    ordered_info_location[it.second.profile_index] = &(it.first);
+    ordered_info_data[it.second.profile_index] = &(it.second);
+  }
+  for (size_t i = 0; i < info_.size(); i++) {
     if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
       if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
         return false;
       }
       buffer.clear();
     }
-    const std::string& dex_location = it.first;
-    const DexFileData& dex_data = it.second;
+    const std::string& dex_location = *ordered_info_location[i];
+    const DexFileData& dex_data = *ordered_info_data[i];
 
     // Note that we allow dex files without any methods or classes, so that
     // inline caches can refer valid dex files.
@@ -289,10 +302,19 @@
     // Add the dex pc.
     AddUintToBuffer(buffer, dex_pc);
 
-    if (dex_pc_data.is_megamorphic) {
-      // Add the megamorphic encoding if needed and continue.
-      // If megamorphic, we don't add the rest of the classes.
-      AddUintToBuffer(buffer, kMegamorphicEncoding);
+    // Add the megamorphic/missing_types encoding if needed and continue.
+    // In either cases we don't add any classes to the profiles and so there's
+    // no point to continue.
+    // TODO(calin): in case we miss types there is still value to add the
+    // rest of the classes. They can be added without bumping the profile version.
+    if (dex_pc_data.is_missing_types) {
+      DCHECK(!dex_pc_data.is_megamorphic);  // at this point the megamorphic flag should not be set.
+      DCHECK_EQ(classes.size(), 0u);
+      AddUintToBuffer(buffer, kIsMissingTypesEncoding);
+      continue;
+    } else if (dex_pc_data.is_megamorphic) {
+      DCHECK_EQ(classes.size(), 0u);
+      AddUintToBuffer(buffer, kIsMegamorphicEncoding);
       continue;
     }
 
@@ -403,11 +425,21 @@
   for (const auto& pmi_inline_cache_it : pmi.inline_caches) {
     uint16_t pmi_ic_dex_pc = pmi_inline_cache_it.first;
     const DexPcData& pmi_ic_dex_pc_data = pmi_inline_cache_it.second;
-    auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(pmi_ic_dex_pc);
-    if (pmi_ic_dex_pc_data.is_megamorphic) {
-      dex_pc_data_it->second.SetMegamorphic();
+    DexPcData& dex_pc_data = inline_cache_it->second.FindOrAdd(pmi_ic_dex_pc)->second;
+    if (dex_pc_data.is_missing_types || dex_pc_data.is_megamorphic) {
+      // We are already megamorphic or we are missing types; no point in going forward.
       continue;
     }
+
+    if (pmi_ic_dex_pc_data.is_missing_types) {
+      dex_pc_data.SetIsMissingTypes();
+      continue;
+    }
+    if (pmi_ic_dex_pc_data.is_megamorphic) {
+      dex_pc_data.SetIsMegamorphic();
+      continue;
+    }
+
     for (const ClassReference& class_ref : pmi_ic_dex_pc_data.classes) {
       const DexReference& dex_ref = pmi.dex_references[class_ref.dex_profile_index];
       DexFileData* class_dex_data = GetOrAddDexFileData(
@@ -416,7 +448,7 @@
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
-      dex_pc_data_it->second.AddClass(class_dex_data->profile_index, class_ref.type_index);
+      dex_pc_data.AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
   return true;
@@ -432,6 +464,11 @@
   auto inline_cache_it = data->method_map.FindOrAdd(pmi.dex_method_index);
 
   for (const ProfileMethodInfo::ProfileInlineCache& cache : pmi.inline_caches) {
+    if (cache.is_missing_types) {
+      auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
+      dex_pc_data_it->second.SetIsMissingTypes();
+      continue;
+    }
     for (const ProfileMethodInfo::ProfileClassReference& class_ref : cache.classes) {
       DexFileData* class_dex_data = GetOrAddDexFileData(
           GetProfileDexFileKey(class_ref.dex_file->GetLocation()),
@@ -440,6 +477,10 @@
         return false;
       }
       auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
+      if (dex_pc_data_it->second.is_missing_types) {
+        // Don't bother adding classes if we are missing types.
+        break;
+      }
       dex_pc_data_it->second.AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
@@ -478,8 +519,12 @@
     READ_UINT(uint16_t, buffer, dex_pc, error);
     READ_UINT(uint8_t, buffer, dex_to_classes_map_size, error);
     auto dex_pc_data_it = inline_cache->FindOrAdd(dex_pc);
-    if (dex_to_classes_map_size == kMegamorphicEncoding) {
-      dex_pc_data_it->second.SetMegamorphic();
+    if (dex_to_classes_map_size == kIsMissingTypesEncoding) {
+      dex_pc_data_it->second.SetIsMissingTypes();
+      continue;
+    }
+    if (dex_to_classes_map_size == kIsMegamorphicEncoding) {
+      dex_pc_data_it->second.SetIsMegamorphic();
       continue;
     }
     for (; dex_to_classes_map_size > 0; dex_to_classes_map_size--) {
@@ -797,10 +842,13 @@
   SafeMap<uint8_t, uint8_t> dex_profile_index_remap;
   for (const auto& other_it : other.info_) {
     const std::string& other_dex_location = other_it.first;
+    uint32_t other_checksum = other_it.second.checksum;
     const DexFileData& other_dex_data = other_it.second;
-    auto info_it = info_.FindOrAdd(other_dex_location, DexFileData(other_dex_data.checksum, 0));
-    const DexFileData& dex_data = info_it->second;
-    dex_profile_index_remap.Put(other_dex_data.profile_index, dex_data.profile_index);
+    const DexFileData* dex_data = GetOrAddDexFileData(other_dex_location, other_checksum);
+    if (dex_data == nullptr) {
+      return false;  // Could happen if we exceed the number of allowed dex files.
+    }
+    dex_profile_index_remap.Put(other_dex_data.profile_index, dex_data->profile_index);
   }
 
   // Merge the actual profile data.
@@ -823,9 +871,15 @@
         uint16_t other_dex_pc = other_ic_it.first;
         const ClassSet& other_class_set = other_ic_it.second.classes;
         auto class_set = method_it->second.FindOrAdd(other_dex_pc);
-        for (const auto& class_it : other_class_set) {
-          class_set->second.AddClass(dex_profile_index_remap.Get(
-              class_it.dex_profile_index), class_it.type_index);
+        if (other_ic_it.second.is_missing_types) {
+          class_set->second.SetIsMissingTypes();
+        } else if (other_ic_it.second.is_megamorphic) {
+          class_set->second.SetIsMegamorphic();
+        } else {
+          for (const auto& class_it : other_class_set) {
+            class_set->second.AddClass(dex_profile_index_remap.Get(
+                class_it.dex_profile_index), class_it.type_index);
+          }
         }
       }
     }
@@ -945,10 +999,17 @@
   os << "ProfileInfo:";
 
   const std::string kFirstDexFileKeySubstitute = ":classes.dex";
+  // Write the entries in profile index order.
+  std::vector<const std::string*> ordered_info_location(info_.size());
+  std::vector<const DexFileData*> ordered_info_data(info_.size());
   for (const auto& it : info_) {
+    ordered_info_location[it.second.profile_index] = &(it.first);
+    ordered_info_data[it.second.profile_index] = &(it.second);
+  }
+  for (size_t profile_index = 0; profile_index < info_.size(); profile_index++) {
     os << "\n";
-    const std::string& location = it.first;
-    const DexFileData& dex_data = it.second;
+    const std::string& location = *ordered_info_location[profile_index];
+    const DexFileData& dex_data = *ordered_info_data[profile_index];
     if (print_full_dex_location) {
       os << location;
     } else {
@@ -956,6 +1017,7 @@
       std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
       os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
     }
+    os << " [index=" << static_cast<uint32_t>(dex_data.profile_index) << "]";
     const DexFile* dex_file = nullptr;
     if (dex_files != nullptr) {
       for (size_t i = 0; i < dex_files->size(); i++) {
@@ -975,8 +1037,10 @@
       os << "[";
       for (const auto& inline_cache_it : method_it.second) {
         os << "{" << std::hex << inline_cache_it.first << std::dec << ":";
-        if (inline_cache_it.second.is_megamorphic) {
-          os << "M";
+        if (inline_cache_it.second.is_missing_types) {
+          os << "MT";
+        } else if (inline_cache_it.second.is_megamorphic) {
+          os << "MM";
         } else {
           for (const ClassReference& class_ref : inline_cache_it.second.classes) {
             os << "(" << static_cast<uint32_t>(class_ref.dex_profile_index)
@@ -1018,7 +1082,8 @@
     const DexFile* dex_file = nullptr;
     if (dex_files != nullptr) {
       for (size_t i = 0; i < dex_files->size(); i++) {
-        if (location == (*dex_files)[i]->GetLocation()) {
+        if (location == GetProfileDexFileKey((*dex_files)[i]->GetLocation()) &&
+            dex_data.checksum == (*dex_files)[i]->GetLocationChecksum()) {
           dex_file = (*dex_files)[i];
         }
       }
@@ -1035,15 +1100,22 @@
   return info_.Equals(other.info_);
 }
 
-std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses() const {
+std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses(
+    const std::unordered_set<std::string>& dex_files_locations) const {
+  std::unordered_map<std::string, std::string> key_to_location_map;
+  for (const std::string& location : dex_files_locations) {
+    key_to_location_map.emplace(GetProfileDexFileKey(location), location);
+  }
   std::set<DexCacheResolvedClasses> ret;
   for (auto&& pair : info_) {
     const std::string& profile_key = pair.first;
-    const DexFileData& data = pair.second;
-    // TODO: Is it OK to use the same location for both base and dex location here?
-    DexCacheResolvedClasses classes(profile_key, profile_key, data.checksum);
-    classes.AddClasses(data.class_set.begin(), data.class_set.end());
-    ret.insert(classes);
+    auto it = key_to_location_map.find(profile_key);
+    if (it != key_to_location_map.end()) {
+      const DexFileData& data = pair.second;
+      DexCacheResolvedClasses classes(it->second, it->second, data.checksum);
+      classes.AddClasses(data.class_set.begin(), data.class_set.end());
+      ret.insert(classes);
+    }
   }
   return ret;
 }
@@ -1104,7 +1176,7 @@
   }
 
   // We can't use a simple equality test because we need to match the dex files
-  // of the inline caches which might have different profile indices.
+  // of the inline caches which might have different profile indexes.
   for (const auto& inline_cache_it : inline_caches) {
     uint16_t dex_pc = inline_cache_it.first;
     const DexPcData dex_pc_data = inline_cache_it.second;
@@ -1113,7 +1185,8 @@
       return false;
     }
     const DexPcData& other_dex_pc_data = other_it->second;
-    if (dex_pc_data.is_megamorphic != other_dex_pc_data.is_megamorphic) {
+    if (dex_pc_data.is_megamorphic != other_dex_pc_data.is_megamorphic ||
+        dex_pc_data.is_missing_types != other_dex_pc_data.is_missing_types) {
       return false;
     }
     for (const ClassReference& class_ref : dex_pc_data.classes) {
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index 4bfbfcd..6ad528c 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -36,18 +36,22 @@
  */
 struct ProfileMethodInfo {
   struct ProfileClassReference {
+    ProfileClassReference() : dex_file(nullptr) {}
     ProfileClassReference(const DexFile* dex, const dex::TypeIndex& index)
         : dex_file(dex), type_index(index) {}
 
     const DexFile* dex_file;
-    const dex::TypeIndex type_index;
+    dex::TypeIndex type_index;
   };
 
   struct ProfileInlineCache {
-    ProfileInlineCache(uint32_t pc, const std::vector<ProfileClassReference>& profile_classes)
-        : dex_pc(pc), classes(profile_classes) {}
+    ProfileInlineCache(uint32_t pc,
+                       bool missing_types,
+                       const std::vector<ProfileClassReference>& profile_classes)
+        : dex_pc(pc), is_missing_types(missing_types), classes(profile_classes) {}
 
     const uint32_t dex_pc;
+    const bool is_missing_types;
     const std::vector<ProfileClassReference> classes;
   };
 
@@ -91,6 +95,11 @@
       return dex_checksum == other.dex_checksum && dex_location == other.dex_location;
     }
 
+    bool MatchesDex(const DexFile* dex_file) const {
+      return dex_checksum == dex_file->GetLocationChecksum() &&
+           dex_location == GetProfileDexFileKey(dex_file->GetLocation());
+    }
+
     std::string dex_location;
     uint32_t dex_checksum;
   };
@@ -128,18 +137,30 @@
 
   // Encodes the actual inline cache for a given dex pc (whether or not the receiver is
   // megamorphic and its possible types).
-  // If the receiver is megamorphic the set of classes will be empty.
+  // If the receiver is megamorphic or is missing types the set of classes will be empty.
   struct DexPcData {
-    DexPcData() : is_megamorphic(false) {}
+    DexPcData() : is_missing_types(false), is_megamorphic(false) {}
     void AddClass(uint16_t dex_profile_idx, const dex::TypeIndex& type_idx);
-    void SetMegamorphic() {
+    void SetIsMegamorphic() {
+      if (is_missing_types) return;
       is_megamorphic = true;
       classes.clear();
     }
+    void SetIsMissingTypes() {
+      is_megamorphic = false;
+      is_missing_types = true;
+      classes.clear();
+    }
     bool operator==(const DexPcData& other) const {
-      return is_megamorphic == other.is_megamorphic && classes == other.classes;
+      return is_megamorphic == other.is_megamorphic &&
+          is_missing_types == other.is_missing_types &&
+          classes == other.classes;
     }
 
+    // Not all runtime types can be encoded in the profile. For example if the receiver
+    // type is in a dex file which is not tracked for profiling its type cannot be
+    // encoded. When types are missing this field will be set to true.
+    bool is_missing_types;
     bool is_megamorphic;
     ClassSet classes;
   };
@@ -218,9 +239,8 @@
   bool Equals(const ProfileCompilationInfo& other);
 
   // Return the class descriptors for all of the classes in the profiles' class sets.
-  // Note the dex location is actually the profile key, the caller needs to call back in to the
-  // profile info stuff to generate a map back to the dex location.
-  std::set<DexCacheResolvedClasses> GetResolvedClasses() const;
+  std::set<DexCacheResolvedClasses> GetResolvedClasses(
+      const std::unordered_set<std::string>& dex_files_locations) const;
 
   // Clear the resolved classes from the current object.
   void ClearResolvedClasses();
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 93b47ac..5cd8e8f 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -108,26 +108,31 @@
     for (ArtMethod* method : methods) {
       std::vector<ProfileMethodInfo::ProfileInlineCache> caches;
       // Monomorphic
-      for (uint16_t dex_pc = 0; dex_pc < 1; dex_pc++) {
+      for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
         std::vector<ProfileMethodInfo::ProfileClassReference> classes;
         classes.emplace_back(method->GetDexFile(), dex::TypeIndex(0));
-        caches.emplace_back(dex_pc, classes);
+        caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
       }
       // Polymorphic
-      for (uint16_t dex_pc = 1; dex_pc < 2; dex_pc++) {
+      for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
         std::vector<ProfileMethodInfo::ProfileClassReference> classes;
         for (uint16_t k = 0; k < InlineCache::kIndividualCacheSize / 2; k++) {
           classes.emplace_back(method->GetDexFile(), dex::TypeIndex(k));
         }
-        caches.emplace_back(dex_pc, classes);
+        caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
       }
       // Megamorphic
-      for (uint16_t dex_pc = 2; dex_pc < 3; dex_pc++) {
+      for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
         std::vector<ProfileMethodInfo::ProfileClassReference> classes;
         for (uint16_t k = 0; k < 2 * InlineCache::kIndividualCacheSize; k++) {
           classes.emplace_back(method->GetDexFile(), dex::TypeIndex(k));
         }
-        caches.emplace_back(dex_pc, classes);
+        caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
+      }
+      // Missing types
+      for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
+        std::vector<ProfileMethodInfo::ProfileClassReference> classes;
+        caches.emplace_back(dex_pc, /*is_missing_types*/true, classes);
       }
       ProfileMethodInfo pmi(method->GetDexFile(), method->GetDexMethodIndex(), caches);
       profile_methods.push_back(pmi);
@@ -148,12 +153,15 @@
     ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi;
     SafeMap<DexFile*, uint8_t> dex_map;  // dex files to profile index
     for (const auto& inline_cache : pmi.inline_caches) {
+      ProfileCompilationInfo::DexPcData& dex_pc_data =
+          offline_pmi.inline_caches.FindOrAdd(inline_cache.dex_pc)->second;
+      if (inline_cache.is_missing_types) {
+        dex_pc_data.SetIsMissingTypes();
+      }
       for (const auto& class_ref : inline_cache.classes) {
         uint8_t dex_profile_index = dex_map.FindOrAdd(const_cast<DexFile*>(class_ref.dex_file),
                                                       static_cast<uint8_t>(dex_map.size()))->second;
-        offline_pmi.inline_caches
-            .FindOrAdd(inline_cache.dex_pc)->second
-            .AddClass(dex_profile_index, class_ref.type_index);
+        dex_pc_data.AddClass(dex_profile_index, class_ref.type_index);
         if (dex_profile_index >= offline_pmi.dex_references.size()) {
           // This is a new dex.
           const std::string& dex_key = ProfileCompilationInfo::GetProfileDexFileKey(
@@ -170,18 +178,18 @@
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo() {
     ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
 
-    pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-    pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2);
-    pmi.dex_references.emplace_back("dex_location3", /* checksum */ 3);
+    pmi.dex_references.emplace_back("dex_location1", /* checksum */1);
+    pmi.dex_references.emplace_back("dex_location2", /* checksum */2);
+    pmi.dex_references.emplace_back("dex_location3", /* checksum */3);
 
     // Monomorphic
-    for (uint16_t dex_pc = 0; dex_pc < 1; dex_pc++) {
+    for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data;
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Polymorphic
-    for (uint16_t dex_pc = 1; dex_pc < 2; dex_pc++) {
+    for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data;
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
@@ -190,9 +198,15 @@
        pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Megamorphic
-    for (uint16_t dex_pc = 2; dex_pc < 3; dex_pc++) {
+    for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data;
-      dex_pc_data.is_megamorphic = true;
+      dex_pc_data.SetIsMegamorphic();
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Missing types
+    for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.SetIsMissingTypes();
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
 
@@ -207,7 +221,13 @@
     }
   }
 
-  // Cannot sizeof the actual arrays so hardcode the values here.
+  void SetIsMissingTypes(/*out*/ProfileCompilationInfo::OfflineProfileMethodInfo* pmi) {
+    for (auto it : pmi->inline_caches) {
+      it.second.SetIsMissingTypes();
+    }
+  }
+
+  // Cannot sizeof the actual arrays so hard code the values here.
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
   static constexpr int kProfileVersionSize = 4;
@@ -530,6 +550,58 @@
   ASSERT_TRUE(loaded_pmi1 == pmi_extra);
 }
 
+TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCaches) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi = GetOfflineProfileMethodInfo();
+
+  // Add methods with inline caches.
+  for (uint16_t method_idx = 0; method_idx < 10; method_idx++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, method_idx, pmi, &saved_info));
+  }
+
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Make some inline caches megamorphic and add them to the profile again.
+  ProfileCompilationInfo saved_info_extra;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi_extra = GetOfflineProfileMethodInfo();
+  MakeMegamorphic(&pmi_extra);
+  for (uint16_t method_idx = 5; method_idx < 10; method_idx++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, method_idx, pmi, &saved_info_extra));
+  }
+
+  // Mark all inline caches with missing types and add them to the profile again.
+  // This will verify that all inline caches (megamorphic or not) should be marked as missing types.
+  ProfileCompilationInfo::OfflineProfileMethodInfo missing_types = GetOfflineProfileMethodInfo();
+  SetIsMissingTypes(&missing_types);
+  for (uint16_t method_idx = 0; method_idx < 10; method_idx++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, method_idx, pmi, &saved_info_extra));
+  }
+
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info_extra.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Merge the profiles so that we have the same view as the file.
+  ASSERT_TRUE(saved_info.MergeWith(saved_info_extra));
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
+  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
+                                    /* checksum */ 1,
+                                    /* method_idx */ 3,
+                                    &loaded_pmi1));
+  ASSERT_TRUE(loaded_pmi1 == pmi_extra);
+}
+
 TEST_F(ProfileCompilationInfoTest, SaveArtMethodsWithInlineCaches) {
   ScratchFile profile;
 
@@ -570,7 +642,7 @@
   }
 }
 
-TEST_F(ProfileCompilationInfoTest, InvalidChecksumInInlineCahce) {
+TEST_F(ProfileCompilationInfoTest, InvalidChecksumInInlineCache) {
   ScratchFile profile;
 
   ProfileCompilationInfo info;
@@ -657,4 +729,62 @@
       /*dex_location*/ "256", /* checksum */ 1, /* method_idx */ 0, &info));
 }
 
+TEST_F(ProfileCompilationInfoTest, MegamorphicInlineCachesMerge) {
+  // Create a megamorphic inline cache.
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  ProfileCompilationInfo::DexPcData dex_pc_data;
+  dex_pc_data.SetIsMegamorphic();
+  pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
+
+  ProfileCompilationInfo info_megamorphic;
+  ASSERT_TRUE(AddMethod("dex_location1",
+                        /*checksum*/ 1,
+                        /*method_idx*/ 0,
+                        pmi,
+                        &info_megamorphic));
+
+  // Create a profile with no inline caches (for the same method).
+  ProfileCompilationInfo info_no_inline_cache;
+  ASSERT_TRUE(AddMethod("dex_location1",
+                        /*checksum*/ 1,
+                        /*method_idx*/ 0,
+                        &info_no_inline_cache));
+
+  // Merge the megamorphic cache into the empty one.
+  ASSERT_TRUE(info_no_inline_cache.MergeWith(info_megamorphic));
+  ScratchFile profile;
+  // Saving profile should work without crashing (b/35644850).
+  ASSERT_TRUE(info_no_inline_cache.Save(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCachesMerge) {
+  // Create an inline cache with missing types
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  ProfileCompilationInfo::DexPcData dex_pc_data;
+  dex_pc_data.SetIsMissingTypes();
+  pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
+
+  ProfileCompilationInfo info_megamorphic;
+  ASSERT_TRUE(AddMethod("dex_location1",
+                        /*checksum*/ 1,
+                        /*method_idx*/ 0,
+                        pmi,
+                        &info_megamorphic));
+
+  // Create a profile with no inline caches (for the same method).
+  ProfileCompilationInfo info_no_inline_cache;
+  ASSERT_TRUE(AddMethod("dex_location1",
+                        /*checksum*/ 1,
+                        /*method_idx*/ 0,
+                        &info_no_inline_cache));
+
+  // Merge the missing type cache into the empty one.
+  // Everything should be saved without errors.
+  ASSERT_TRUE(info_no_inline_cache.MergeWith(info_megamorphic));
+  ScratchFile profile;
+  ASSERT_TRUE(info_no_inline_cache.Save(GetFd(profile)));
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 61e6c41..2724b00 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -39,14 +39,9 @@
 ProfileSaver::ProfileSaver(const ProfileSaverOptions& options,
                            const std::string& output_filename,
                            jit::JitCodeCache* jit_code_cache,
-                           const std::vector<std::string>& code_paths,
-                           const std::string& foreign_dex_profile_path,
-                           const std::string& app_data_dir)
+                           const std::vector<std::string>& code_paths)
     : jit_code_cache_(jit_code_cache),
-      foreign_dex_profile_path_(foreign_dex_profile_path),
       shutting_down_(false),
-      last_save_number_of_methods_(0),
-      last_save_number_of_classes_(0),
       last_time_ns_saver_woke_up_(0),
       jit_activity_notifications_(0),
       wait_lock_("ProfileSaver wait lock"),
@@ -58,13 +53,12 @@
       total_number_of_failed_writes_(0),
       total_ms_of_sleep_(0),
       total_ns_of_work_(0),
-      total_number_of_foreign_dex_marks_(0),
       max_number_of_profile_entries_cached_(0),
       total_number_of_hot_spikes_(0),
       total_number_of_wake_ups_(0),
       options_(options) {
   DCHECK(options_.IsEnabled());
-  AddTrackedLocations(output_filename, app_data_dir, code_paths);
+  AddTrackedLocations(output_filename, code_paths);
 }
 
 void ProfileSaver::Run() {
@@ -175,10 +169,10 @@
   }
 }
 
-ProfileCompilationInfo* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
+ProfileSaver::ProfileInfoCache* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
   auto info_it = profile_cache_.find(filename);
   if (info_it == profile_cache_.end()) {
-    info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+    info_it = profile_cache_.Put(filename, ProfileInfoCache());
   }
   return &info_it->second;
 }
@@ -252,8 +246,9 @@
                        << " (" << classes.GetDexLocation() << ")";
       }
     }
-    ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
-    info->AddMethodsAndClasses(profile_methods_for_location, resolved_classes_for_location);
+    ProfileInfoCache* cached_info = GetCachedProfiledInfo(filename);
+    cached_info->profile.AddMethodsAndClasses(profile_methods_for_location,
+                                              resolved_classes_for_location);
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
@@ -287,14 +282,15 @@
       total_number_of_code_cache_queries_++;
     }
 
-    ProfileCompilationInfo* cached_info = GetCachedProfiledInfo(filename);
-    cached_info->AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
+    ProfileInfoCache* cached_info = GetCachedProfiledInfo(filename);
+    ProfileCompilationInfo* cached_profile = &cached_info->profile;
+    cached_profile->AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
     int64_t delta_number_of_methods =
-        cached_info->GetNumberOfMethods() -
-        static_cast<int64_t>(last_save_number_of_methods_);
+        cached_profile->GetNumberOfMethods() -
+        static_cast<int64_t>(cached_info->last_save_number_of_methods);
     int64_t delta_number_of_classes =
-        cached_info->GetNumberOfResolvedClasses() -
-        static_cast<int64_t>(last_save_number_of_classes_);
+        cached_profile->GetNumberOfResolvedClasses() -
+        static_cast<int64_t>(cached_info->last_save_number_of_classes);
 
     if (delta_number_of_methods < options_.GetMinMethodsToSave() &&
         delta_number_of_classes < options_.GetMinClassesToSave()) {
@@ -308,12 +304,12 @@
     uint64_t bytes_written;
     // Force the save. In case the profile data is corrupted or the the profile
     // has the wrong version this will "fix" the file to the correct format.
-    if (cached_info->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
-      last_save_number_of_methods_ = cached_info->GetNumberOfMethods();
-      last_save_number_of_classes_ = cached_info->GetNumberOfResolvedClasses();
+    if (cached_profile->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
+      cached_info->last_save_number_of_methods = cached_profile->GetNumberOfMethods();
+      cached_info->last_save_number_of_classes = cached_profile->GetNumberOfResolvedClasses();
       // Clear resolved classes. No need to store them around as
       // they don't change after the first write.
-      cached_info->ClearResolvedClasses();
+      cached_profile->ClearResolvedClasses();
       if (bytes_written > 0) {
         total_number_of_writes_++;
         total_bytes_written_ += bytes_written;
@@ -330,8 +326,8 @@
       total_number_of_failed_writes_++;
     }
     total_number_of_profile_entries_cached +=
-        cached_info->GetNumberOfMethods() +
-        cached_info->GetNumberOfResolvedClasses();
+        cached_profile->GetNumberOfMethods() +
+        cached_profile->GetNumberOfResolvedClasses();
   }
   max_number_of_profile_entries_cached_ = std::max(
       max_number_of_profile_entries_cached_,
@@ -382,9 +378,7 @@
 void ProfileSaver::Start(const ProfileSaverOptions& options,
                          const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
-                         const std::vector<std::string>& code_paths,
-                         const std::string& foreign_dex_profile_path,
-                         const std::string& app_data_dir) {
+                         const std::vector<std::string>& code_paths) {
   DCHECK(options.IsEnabled());
   DCHECK(Runtime::Current()->GetJit() != nullptr);
   DCHECK(!output_filename.empty());
@@ -409,7 +403,7 @@
     // apps which share the same runtime).
     DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
     // Add the code_paths to the tracked locations.
-    instance_->AddTrackedLocations(output_filename, app_data_dir, code_paths_to_profile);
+    instance_->AddTrackedLocations(output_filename, code_paths_to_profile);
     return;
   }
 
@@ -419,9 +413,7 @@
   instance_ = new ProfileSaver(options,
                                output_filename,
                                jit_code_cache,
-                               code_paths_to_profile,
-                               foreign_dex_profile_path,
-                               app_data_dir);
+                               code_paths_to_profile);
 
   // Create a new thread which does the saving.
   CHECK_PTHREAD_CALL(
@@ -481,154 +473,16 @@
 }
 
 void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
-                                       const std::string& app_data_dir,
                                        const std::vector<std::string>& code_paths) {
   auto it = tracked_dex_base_locations_.find(output_filename);
   if (it == tracked_dex_base_locations_.end()) {
     tracked_dex_base_locations_.Put(output_filename,
                                     std::set<std::string>(code_paths.begin(), code_paths.end()));
-    if (!app_data_dir.empty()) {
-      app_data_dirs_.insert(app_data_dir);
-    }
   } else {
     it->second.insert(code_paths.begin(), code_paths.end());
   }
 }
 
-// TODO(calin): This may lead to several calls to realpath.
-// Consider moving the logic to the saver thread (i.e. when notified,
-// only cache the location, and then wake up the saver thread to do the
-// comparisons with the real file paths and to create the markers).
-void ProfileSaver::NotifyDexUse(const std::string& dex_location) {
-  if (!ShouldProfileLocation(dex_location)) {
-    return;
-  }
-  std::set<std::string> app_code_paths;
-  std::string foreign_dex_profile_path;
-  std::set<std::string> app_data_dirs;
-  {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    if (instance_ == nullptr) {
-      return;
-    }
-    // Make a copy so that we don't hold the lock while doing I/O.
-    for (const auto& it : instance_->tracked_dex_base_locations_) {
-      app_code_paths.insert(it.second.begin(), it.second.end());
-    }
-    foreign_dex_profile_path = instance_->foreign_dex_profile_path_;
-    app_data_dirs.insert(instance_->app_data_dirs_.begin(), instance_->app_data_dirs_.end());
-  }
-
-  bool mark_created = MaybeRecordDexUseInternal(dex_location,
-                                                app_code_paths,
-                                                foreign_dex_profile_path,
-                                                app_data_dirs);
-  if (mark_created) {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    if (instance_ != nullptr) {
-      instance_->total_number_of_foreign_dex_marks_++;
-    }
-  }
-}
-
-static bool CheckContainsWithRealPath(const std::set<std::string>& paths_set,
-                                      const std::string& path_to_check) {
-  for (const auto& path : paths_set) {
-    UniqueCPtr<const char[]> real_path(realpath(path.c_str(), nullptr));
-    if (real_path == nullptr) {
-      PLOG(WARNING) << "Could not get realpath for " << path;
-      continue;
-    }
-    std::string real_path_str(real_path.get());
-    if (real_path_str == path_to_check) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// After the call, dex_location_real_path will contain the marker's name.
-static bool CreateForeignDexMarker(const std::string& foreign_dex_profile_path,
-                                   /*in-out*/ std::string* dex_location_real_path) {
-  // For foreign dex files we record a flag on disk. PackageManager will (potentially) take this
-  // into account when deciding how to optimize the loaded dex file.
-  // The expected flag name is the canonical path of the apk where '/' is substituted to '@'.
-  // (it needs to be kept in sync with
-  // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java)
-  std::replace(dex_location_real_path->begin(), dex_location_real_path->end(), '/', '@');
-  std::string flag_path = foreign_dex_profile_path + "/" + *dex_location_real_path;
-  // We use O_RDONLY as the access mode because we must supply some access
-  // mode, and there is no access mode that means 'create but do not read' the
-  // file. We will not not actually read from the file.
-  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(),
-        O_CREAT | O_RDONLY | O_EXCL | O_CLOEXEC | O_NOFOLLOW, 0));
-  if (fd != -1) {
-    if (close(fd) != 0) {
-      PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
-    }
-    return true;
-  } else {
-    if (errno != EEXIST && errno != EACCES) {
-      // Another app could have already created the file, and selinux may not
-      // allow the read access to the file implied by the call to open.
-      PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
-      return false;
-    }
-    return true;
-  }
-}
-
-bool ProfileSaver::MaybeRecordDexUseInternal(
-      const std::string& dex_location,
-      const std::set<std::string>& app_code_paths,
-      const std::string& foreign_dex_profile_path,
-      const std::set<std::string>& app_data_dirs) {
-  if (dex_location.empty()) {
-    LOG(WARNING) << "Asked to record foreign dex use with an empty dex location.";
-    return false;
-  }
-  if (foreign_dex_profile_path.empty()) {
-    LOG(WARNING) << "Asked to record foreign dex use without a valid profile path ";
-    return false;
-  }
-
-  if (app_code_paths.find(dex_location) != app_code_paths.end()) {
-    // The dex location belongs to the application code paths. Nothing to record.
-    return false;
-  }
-
-  if (app_data_dirs.find(dex_location) != app_data_dirs.end()) {
-    // The dex location is under the application folder. Nothing to record.
-    return false;
-  }
-
-  // Do another round of checks with the real paths.
-  // Application directory could be a symlink (e.g. /data/data instead of /data/user/0), and we
-  // don't have control over how the dex files are actually loaded (symlink or canonical path),
-
-  // Note that we could cache all the real locations in the saver (since it's an expensive
-  // operation). However we expect that app_code_paths is small (usually 1 element), and
-  // NotifyDexUse is called just a few times in the app lifetime. So we make the compromise
-  // to save some bytes of memory usage.
-
-  UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr));
-  if (dex_location_real_path == nullptr) {
-    PLOG(WARNING) << "Could not get realpath for " << dex_location;
-    return false;
-  }
-  std::string dex_location_real_path_str(dex_location_real_path.get());
-
-  if (CheckContainsWithRealPath(app_code_paths, dex_location_real_path_str)) {
-    return false;
-  }
-
-  if (CheckContainsWithRealPath(app_data_dirs, dex_location_real_path_str)) {
-    return false;
-  }
-
-  return CreateForeignDexMarker(foreign_dex_profile_path, &dex_location_real_path_str);
-}
-
 void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
@@ -645,8 +499,6 @@
      << "ProfileSaver total_number_of_failed_writes=" << total_number_of_failed_writes_ << '\n'
      << "ProfileSaver total_ms_of_sleep=" << total_ms_of_sleep_ << '\n'
      << "ProfileSaver total_ms_of_work=" << NsToMs(total_ns_of_work_) << '\n'
-     << "ProfileSaver total_number_of_foreign_dex_marks="
-     << total_number_of_foreign_dex_marks_ << '\n'
      << "ProfileSaver max_number_profile_entries_cached="
      << max_number_of_profile_entries_cached_ << '\n'
      << "ProfileSaver total_number_of_hot_spikes=" << total_number_of_hot_spikes_ << '\n'
@@ -674,10 +526,8 @@
                                  uint16_t method_idx) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
-    ProfileCompilationInfo* info = instance_->GetCachedProfiledInfo(profile);
-    if (info != nullptr) {
-      return info->ContainsMethod(MethodReference(dex_file, method_idx));
-    }
+    const ProfileCompilationInfo& info = instance_->GetCachedProfiledInfo(profile)->profile;
+    return info.ContainsMethod(MethodReference(dex_file, method_idx));
   }
   return false;
 }
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 9c5e41f..8e0682d 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -32,9 +32,7 @@
   static void Start(const ProfileSaverOptions& options,
                     const std::string& output_filename,
                     jit::JitCodeCache* jit_code_cache,
-                    const std::vector<std::string>& code_paths,
-                    const std::string& foreign_dex_profile_path,
-                    const std::string& app_data_dir)
+                    const std::vector<std::string>& code_paths)
       REQUIRES(!Locks::profiler_lock_, !wait_lock_);
 
   // Stops the profile saver thread.
@@ -46,8 +44,6 @@
   // Returns true if the profile saver is started.
   static bool IsStarted() REQUIRES(!Locks::profiler_lock_);
 
-  static void NotifyDexUse(const std::string& dex_location);
-
   // If the profile saver is running, dumps statistics to the `os`. Otherwise it does nothing.
   static void DumpInstanceInfo(std::ostream& os);
 
@@ -63,12 +59,18 @@
                             uint16_t method_idx);
 
  private:
+  // A cache structure which keeps track of the data saved to disk.
+  // It is used to reduce the number of disk read/writes.
+  struct ProfileInfoCache {
+    ProfileCompilationInfo profile;
+    uint32_t last_save_number_of_methods = 0;
+    uint32_t last_save_number_of_classes = 0;
+  };
+
   ProfileSaver(const ProfileSaverOptions& options,
                const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
-               const std::vector<std::string>& code_paths,
-               const std::string& foreign_dex_profile_path,
-               const std::string& app_data_dir);
+               const std::vector<std::string>& code_paths);
 
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* RunProfileSaverThread(void* arg)
@@ -90,24 +92,17 @@
   bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
 
   void AddTrackedLocations(const std::string& output_filename,
-                           const std::string& app_data_dir,
                            const std::vector<std::string>& code_paths)
       REQUIRES(Locks::profiler_lock_);
 
   // Retrieves the cached profile compilation info for the given profile file.
   // If no entry exists, a new empty one will be created, added to the cache and
   // then returned.
-  ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
+  ProfileInfoCache* GetCachedProfiledInfo(const std::string& filename);
   // Fetches the current resolved classes and methods from the ClassLinker and stores them in the
   // profile_cache_ for later save.
   void FetchAndCacheResolvedClassesAndMethods();
 
-  static bool MaybeRecordDexUseInternal(
-      const std::string& dex_location,
-      const std::set<std::string>& tracked_locations,
-      const std::string& foreign_dex_profile_path,
-      const std::set<std::string>& app_data_dirs);
-
   void DumpInfo(std::ostream& os);
 
   // The only instance of the saver.
@@ -121,17 +116,8 @@
   // It maps profile locations to code paths (dex base locations).
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
-  // The directory were the we should store the code paths.
-  std::string foreign_dex_profile_path_;
-
-  // A list of application directories, used to infer if a loaded dex belongs
-  // to the application or not. Multiple application data directories are possible when
-  // different apps share the same runtime.
-  std::set<std::string> app_data_dirs_ GUARDED_BY(Locks::profiler_lock_);
 
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-  uint32_t last_save_number_of_methods_;
-  uint32_t last_save_number_of_classes_;
   uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_);
   uint32_t jit_activity_notifications_;
 
@@ -139,7 +125,7 @@
   // profile information. The size of this cache is usually very small and tops
   // to just a few hundreds entries in the ProfileCompilationInfo objects.
   // It helps avoiding unnecessary writes to disk.
-  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
+  SafeMap<std::string, ProfileInfoCache> profile_cache_;
 
   // Save period condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -152,7 +138,6 @@
   uint64_t total_number_of_failed_writes_;
   uint64_t total_ms_of_sleep_;
   uint64_t total_ns_of_work_;
-  uint64_t total_number_of_foreign_dex_marks_;
   // TODO(calin): replace with an actual size.
   uint64_t max_number_of_profile_entries_cached_;
   uint64_t total_number_of_hot_spikes_;
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 547b5b8..5418d35 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2265,7 +2265,18 @@
 
       VLOG(jni) << "[Registering JNI native method " << m->PrettyMethod() << "]";
 
-      is_fast = is_fast || m->IsFastNative();  // Merge with @FastNative state.
+      if (UNLIKELY(is_fast)) {
+        // There are a few reasons to switch:
+        // 1) We don't support !bang JNI anymore, it will turn to a hard error later.
+        // 2) @FastNative is actually faster. At least 1.5x faster than !bang JNI.
+        //    and switching is super easy, remove ! in C code, add annotation in .java code.
+        // 3) Good chance of hitting DCHECK failures in ScopedFastNativeObjectAccess
+        //    since that checks for presence of @FastNative and not for ! in the descriptor.
+        LOG(WARNING) << "!bang JNI is deprecated. Switch to @FastNative for " << m->PrettyMethod();
+        is_fast = false;
+        // TODO: make this a hard register error in the future.
+      }
+
       m->RegisterNative(fnPtr, is_fast);
     }
     return JNI_OK;
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index b3837c4..580a42b 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -19,20 +19,10 @@
 
 #include <jni.h>
 #include <iosfwd>
+#include "nativehelper/jni_macros.h"
 
 #include "base/macros.h"
 
-#ifndef NATIVE_METHOD
-#define NATIVE_METHOD(className, functionName, signature) \
-  { #functionName, signature, reinterpret_cast<void*>(className ## _ ## functionName) }
-#endif
-
-// TODO: Can we do a better job of supporting overloading ?
-#ifndef OVERLOADED_NATIVE_METHOD
-#define OVERLOADED_NATIVE_METHOD(className, functionName, signature, identifier) \
-    { #functionName, signature, reinterpret_cast<void*>(className ## _ ## identifier) }
-#endif
-
 #define REGISTER_NATIVE_METHODS(jni_class_name) \
   RegisterNativeMethods(env, jni_class_name, gMethods, arraysize(gMethods))
 
diff --git a/runtime/jvalue.h b/runtime/jvalue.h
index 398bfbc..f61a07c 100644
--- a/runtime/jvalue.h
+++ b/runtime/jvalue.h
@@ -39,7 +39,9 @@
   }
 
   uint16_t GetC() const { return c; }
-  void SetC(uint16_t new_c) { c = new_c; }
+  void SetC(uint16_t new_c) {
+    j = static_cast<int64_t>(new_c);  // Zero-extend to 64 bits.
+  }
 
   double GetD() const { return d; }
   void SetD(double new_d) { d = new_d; }
@@ -66,7 +68,9 @@
   }
 
   uint8_t GetZ() const { return z; }
-  void SetZ(uint8_t new_z) { z = new_z; }
+  void SetZ(uint8_t new_z) {
+    j = static_cast<int64_t>(new_z);  // Zero-extend to 64 bits.
+  }
 
   mirror::Object** GetGCRoot() { return &l; }
 
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
index f91b0ed..e9db9b8 100644
--- a/runtime/linear_alloc.cc
+++ b/runtime/linear_alloc.cc
@@ -33,6 +33,11 @@
   return allocator_.Alloc(size);
 }
 
+void* LinearAlloc::AllocAlign16(Thread* self, size_t size) {
+  MutexLock mu(self, lock_);
+  return allocator_.AllocAlign16(size);
+}
+
 size_t LinearAlloc::GetUsedMemory() const {
   MutexLock mu(Thread::Current(), lock_);
   return allocator_.BytesUsed();
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index df7f17d..384b2e3 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -29,6 +29,7 @@
   explicit LinearAlloc(ArenaPool* pool);
 
   void* Alloc(Thread* self, size_t size) REQUIRES(!lock_);
+  void* AllocAlign16(Thread* self, size_t size) REQUIRES(!lock_);
 
   // Realloc never frees the input pointer, it is the caller's job to do this if necessary.
   void* Realloc(Thread* self, void* ptr, size_t old_size, size_t new_size) REQUIRES(!lock_);
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 2f2565b..edc64f3 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -57,7 +57,8 @@
  *  |10|9|87654321098765432109876543210|
  *  |11|0| ForwardingAddress           |
  *
- * The rb bits store the read barrier state.
+ * The `r` bit stores the read barrier state.
+ * The `m` bit stores the mark state.
  */
 class LockWord {
  public:
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 93c212b..40309b9 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -962,4 +962,52 @@
   }
 }
 
+void MemMap::AlignBy(size_t size) {
+  CHECK_EQ(begin_, base_begin_) << "Unsupported";
+  CHECK_EQ(size_, base_size_) << "Unsupported";
+  CHECK_GT(size, static_cast<size_t>(kPageSize));
+  CHECK_ALIGNED(size, kPageSize);
+  if (IsAlignedParam(reinterpret_cast<uintptr_t>(base_begin_), size) &&
+      IsAlignedParam(base_size_, size)) {
+    // Already aligned.
+    return;
+  }
+  uint8_t* base_begin = reinterpret_cast<uint8_t*>(base_begin_);
+  uint8_t* base_end = base_begin + base_size_;
+  uint8_t* aligned_base_begin = AlignUp(base_begin, size);
+  uint8_t* aligned_base_end = AlignDown(base_end, size);
+  CHECK_LE(base_begin, aligned_base_begin);
+  CHECK_LE(aligned_base_end, base_end);
+  size_t aligned_base_size = aligned_base_end - aligned_base_begin;
+  CHECK_LT(aligned_base_begin, aligned_base_end)
+      << "base_begin = " << reinterpret_cast<void*>(base_begin)
+      << " base_end = " << reinterpret_cast<void*>(base_end);
+  CHECK_GE(aligned_base_size, size);
+  // Unmap the unaligned parts.
+  if (base_begin < aligned_base_begin) {
+    MEMORY_TOOL_MAKE_UNDEFINED(base_begin, aligned_base_begin - base_begin);
+    CHECK_EQ(munmap(base_begin, aligned_base_begin - base_begin), 0)
+        << "base_begin=" << reinterpret_cast<void*>(base_begin)
+        << " aligned_base_begin=" << reinterpret_cast<void*>(aligned_base_begin);
+  }
+  if (aligned_base_end < base_end) {
+    MEMORY_TOOL_MAKE_UNDEFINED(aligned_base_end, base_end - aligned_base_end);
+    CHECK_EQ(munmap(aligned_base_end, base_end - aligned_base_end), 0)
+        << "base_end=" << reinterpret_cast<void*>(base_end)
+        << " aligned_base_end=" << reinterpret_cast<void*>(aligned_base_end);
+  }
+  std::lock_guard<std::mutex> mu(*mem_maps_lock_);
+  base_begin_ = aligned_base_begin;
+  base_size_ = aligned_base_size;
+  begin_ = aligned_base_begin;
+  size_ = aligned_base_size;
+  DCHECK(maps_ != nullptr);
+  if (base_begin < aligned_base_begin) {
+    auto it = maps_->find(base_begin);
+    CHECK(it != maps_->end()) << "MemMap not found";
+    maps_->erase(it);
+    maps_->insert(std::make_pair(base_begin_, this));
+  }
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 71db3f7..ceb4c33 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -193,6 +193,9 @@
   // intermittently.
   void TryReadable();
 
+  // Align the map by unmapping the unaligned parts at the lower and the higher ends.
+  void AlignBy(size_t size);
+
  private:
   MemMap(const std::string& name,
          uint8_t* begin,
@@ -222,10 +225,10 @@
                            bool low_4gb);
 
   const std::string name_;
-  uint8_t* const begin_;  // Start of data.
+  uint8_t* begin_;  // Start of data. May be changed by AlignBy.
   size_t size_;  // Length of data.
 
-  void* const base_begin_;  // Page-aligned base address.
+  void* base_begin_;  // Page-aligned base address. May be changed by AlignBy.
   size_t base_size_;  // Length of mapping. May be changed by RemapAtEnd (ie Zygote).
   int prot_;  // Protection of the map.
 
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index e703b78..aa306ac 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -431,4 +431,108 @@
   ASSERT_FALSE(MemMap::CheckNoGaps(map0.get(), map2.get()));
 }
 
+TEST_F(MemMapTest, AlignBy) {
+  CommonInit();
+  std::string error_msg;
+  // Cast the page size to size_t.
+  const size_t page_size = static_cast<size_t>(kPageSize);
+  // Map a region.
+  std::unique_ptr<MemMap> m0(MemMap::MapAnonymous("MemMapTest_AlignByTest_map0",
+                                                  nullptr,
+                                                  14 * page_size,
+                                                  PROT_READ | PROT_WRITE,
+                                                  false,
+                                                  false,
+                                                  &error_msg));
+  uint8_t* base0 = m0->Begin();
+  ASSERT_TRUE(base0 != nullptr) << error_msg;
+  ASSERT_EQ(m0->Size(), 14 * page_size);
+  ASSERT_EQ(BaseBegin(m0.get()), base0);
+  ASSERT_EQ(BaseSize(m0.get()), m0->Size());
+
+  // Break it into several regions by using RemapAtEnd.
+  std::unique_ptr<MemMap> m1(m0->RemapAtEnd(base0 + 3 * page_size,
+                                            "MemMapTest_AlignByTest_map1",
+                                            PROT_READ | PROT_WRITE,
+                                            &error_msg));
+  uint8_t* base1 = m1->Begin();
+  ASSERT_TRUE(base1 != nullptr) << error_msg;
+  ASSERT_EQ(base1, base0 + 3 * page_size);
+  ASSERT_EQ(m0->Size(), 3 * page_size);
+
+  std::unique_ptr<MemMap> m2(m1->RemapAtEnd(base1 + 4 * page_size,
+                                            "MemMapTest_AlignByTest_map2",
+                                            PROT_READ | PROT_WRITE,
+                                            &error_msg));
+  uint8_t* base2 = m2->Begin();
+  ASSERT_TRUE(base2 != nullptr) << error_msg;
+  ASSERT_EQ(base2, base1 + 4 * page_size);
+  ASSERT_EQ(m1->Size(), 4 * page_size);
+
+  std::unique_ptr<MemMap> m3(m2->RemapAtEnd(base2 + 3 * page_size,
+                                            "MemMapTest_AlignByTest_map1",
+                                            PROT_READ | PROT_WRITE,
+                                            &error_msg));
+  uint8_t* base3 = m3->Begin();
+  ASSERT_TRUE(base3 != nullptr) << error_msg;
+  ASSERT_EQ(base3, base2 + 3 * page_size);
+  ASSERT_EQ(m2->Size(), 3 * page_size);
+  ASSERT_EQ(m3->Size(), 4 * page_size);
+
+  uint8_t* end0 = base0 + m0->Size();
+  uint8_t* end1 = base1 + m1->Size();
+  uint8_t* end2 = base2 + m2->Size();
+  uint8_t* end3 = base3 + m3->Size();
+
+  ASSERT_EQ(static_cast<size_t>(end3 - base0), 14 * page_size);
+
+  if (IsAlignedParam(base0, 2 * page_size)) {
+    ASSERT_FALSE(IsAlignedParam(base1, 2 * page_size));
+    ASSERT_FALSE(IsAlignedParam(base2, 2 * page_size));
+    ASSERT_TRUE(IsAlignedParam(base3, 2 * page_size));
+    ASSERT_TRUE(IsAlignedParam(end3, 2 * page_size));
+  } else {
+    ASSERT_TRUE(IsAlignedParam(base1, 2 * page_size));
+    ASSERT_TRUE(IsAlignedParam(base2, 2 * page_size));
+    ASSERT_FALSE(IsAlignedParam(base3, 2 * page_size));
+    ASSERT_FALSE(IsAlignedParam(end3, 2 * page_size));
+  }
+
+  // Align by 2 * page_size;
+  m0->AlignBy(2 * page_size);
+  m1->AlignBy(2 * page_size);
+  m2->AlignBy(2 * page_size);
+  m3->AlignBy(2 * page_size);
+
+  EXPECT_TRUE(IsAlignedParam(m0->Begin(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m1->Begin(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m2->Begin(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m3->Begin(), 2 * page_size));
+
+  EXPECT_TRUE(IsAlignedParam(m0->Begin() + m0->Size(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m1->Begin() + m1->Size(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m2->Begin() + m2->Size(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m3->Begin() + m3->Size(), 2 * page_size));
+
+  if (IsAlignedParam(base0, 2 * page_size)) {
+    EXPECT_EQ(m0->Begin(), base0);
+    EXPECT_EQ(m0->Begin() + m0->Size(), end0 - page_size);
+    EXPECT_EQ(m1->Begin(), base1 + page_size);
+    EXPECT_EQ(m1->Begin() + m1->Size(), end1 - page_size);
+    EXPECT_EQ(m2->Begin(), base2 + page_size);
+    EXPECT_EQ(m2->Begin() + m2->Size(), end2);
+    EXPECT_EQ(m3->Begin(), base3);
+    EXPECT_EQ(m3->Begin() + m3->Size(), end3);
+  } else {
+    EXPECT_EQ(m0->Begin(), base0 + page_size);
+    EXPECT_EQ(m0->Begin() + m0->Size(), end0);
+    EXPECT_EQ(m1->Begin(), base1);
+    EXPECT_EQ(m1->Begin() + m1->Size(), end1);
+    EXPECT_EQ(m2->Begin(), base2);
+    EXPECT_EQ(m2->Begin() + m2->Size(), end2 - page_size);
+    EXPECT_EQ(m3->Begin(), base3 + page_size);
+    EXPECT_EQ(m3->Begin() + m3->Size(), end3 - page_size);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index d34f09c..c52b66a 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -206,10 +206,10 @@
     return status >= kStatusResolved || status == kStatusErrorResolved;
   }
 
-  // Returns true if the class was compile-time verified.
+  // Returns true if the class should be verified at runtime.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool IsCompileTimeVerified() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetStatus<kVerifyFlags>() >= kStatusRetryVerificationAtRuntime;
+  bool ShouldVerifyAtRuntime() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetStatus<kVerifyFlags>() == kStatusRetryVerificationAtRuntime;
   }
 
   // Returns true if the class has been verified.
@@ -595,7 +595,7 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(PointerSize pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 73;
+    uint32_t vtable_entries = Object::kVTableLength + 70;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 29bf6a0..582ecb2 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -24,6 +24,7 @@
 #include "base/casts.h"
 #include "base/enums.h"
 #include "base/logging.h"
+#include "dex_file.h"
 #include "gc_root.h"
 #include "mirror/class.h"
 #include "mirror/call_site.h"
@@ -36,6 +37,15 @@
 namespace art {
 namespace mirror {
 
+template <typename T>
+inline void NativeDexCachePair<T>::Initialize(std::atomic<NativeDexCachePair<T>>* dex_cache,
+                                              PointerSize pointer_size) {
+  NativeDexCachePair<T> first_elem;
+  first_elem.object = nullptr;
+  first_elem.index = InvalidIndexForSlot(0);
+  DexCache::SetNativePairPtrSize(dex_cache, 0, first_elem, pointer_size);
+}
+
 inline uint32_t DexCache::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = Object::kVTableLength + 5;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
@@ -164,20 +174,36 @@
   }
 }
 
+inline uint32_t DexCache::FieldSlotIndex(uint32_t field_idx) {
+  DCHECK_LT(field_idx, GetDexFile()->NumFieldIds());
+  const uint32_t slot_idx = field_idx % kDexCacheFieldCacheSize;
+  DCHECK_LT(slot_idx, NumResolvedFields());
+  return slot_idx;
+}
+
 inline ArtField* DexCache::GetResolvedField(uint32_t field_idx, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
-  DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
-  ArtField* field = GetElementPtrSize(GetResolvedFields(), field_idx, ptr_size);
-  if (field == nullptr || field->GetDeclaringClass()->IsErroneous()) {
-    return nullptr;
-  }
-  return field;
+  auto pair = GetNativePairPtrSize(GetResolvedFields(), FieldSlotIndex(field_idx), ptr_size);
+  return pair.GetObjectForIndex(field_idx);
 }
 
 inline void DexCache::SetResolvedField(uint32_t field_idx, ArtField* field, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
-  DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
-  SetElementPtrSize(GetResolvedFields(), field_idx, field, ptr_size);
+  DCHECK(field != nullptr);
+  FieldDexCachePair pair(field, field_idx);
+  SetNativePairPtrSize(GetResolvedFields(), FieldSlotIndex(field_idx), pair, ptr_size);
+}
+
+inline void DexCache::ClearResolvedField(uint32_t field_idx, PointerSize ptr_size) {
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
+  uint32_t slot_idx = FieldSlotIndex(field_idx);
+  auto* resolved_fields = GetResolvedFields();
+  // This is racy but should only be called from the single-threaded ImageWriter.
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  if (GetNativePairPtrSize(resolved_fields, slot_idx, ptr_size).index == field_idx) {
+    FieldDexCachePair cleared(nullptr, FieldDexCachePair::InvalidIndexForSlot(slot_idx));
+    SetNativePairPtrSize(resolved_fields, slot_idx, cleared, ptr_size);
+  }
 }
 
 inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size) {
@@ -225,6 +251,40 @@
   }
 }
 
+template <typename T>
+NativeDexCachePair<T> DexCache::GetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                                     size_t idx,
+                                                     PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair64>*>(pair_array);
+    ConversionPair64 value = AtomicLoadRelaxed16B(&array[idx]);
+    return NativeDexCachePair<T>(reinterpret_cast64<T*>(value.first),
+                                 dchecked_integral_cast<size_t>(value.second));
+  } else {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair32>*>(pair_array);
+    ConversionPair32 value = array[idx].load(std::memory_order_relaxed);
+    return NativeDexCachePair<T>(reinterpret_cast<T*>(value.first), value.second);
+  }
+}
+
+template <typename T>
+void DexCache::SetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                    size_t idx,
+                                    NativeDexCachePair<T> pair,
+                                    PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair64>*>(pair_array);
+    ConversionPair64 v(reinterpret_cast64<uint64_t>(pair.object), pair.index);
+    AtomicStoreRelease16B(&array[idx], v);
+  } else {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair32>*>(pair_array);
+    ConversionPair32 v(
+        dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(pair.object)),
+        dchecked_integral_cast<uint32_t>(pair.index));
+    array[idx].store(v, std::memory_order_release);
+  }
+}
+
 template <typename T,
           ReadBarrierOption kReadBarrierOption,
           typename Visitor>
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 1b8b391..c95d92e 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -52,8 +52,12 @@
              dex_file->NumTypeIds() != 0u ||
              dex_file->NumMethodIds() != 0u ||
              dex_file->NumFieldIds() != 0u) {
+    static_assert(ArenaAllocator::kAlignment == 8, "Expecting arena alignment of 8.");
+    DCHECK(layout.Alignment() == 8u || layout.Alignment() == 16u);
     // Zero-initialized.
-    raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
+    raw_arrays = (layout.Alignment() == 16u)
+        ? reinterpret_cast<uint8_t*>(linear_alloc->AllocAlign16(self, layout.Size()))
+        : reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
   }
 
   mirror::StringDexCacheType* strings = (dex_file->NumStringIds() == 0u) ? nullptr :
@@ -62,17 +66,21 @@
       reinterpret_cast<mirror::TypeDexCacheType*>(raw_arrays + layout.TypesOffset());
   ArtMethod** methods = (dex_file->NumMethodIds() == 0u) ? nullptr :
       reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
-  ArtField** fields = (dex_file->NumFieldIds() == 0u) ? nullptr :
-      reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+  mirror::FieldDexCacheType* fields = (dex_file->NumFieldIds() == 0u) ? nullptr :
+      reinterpret_cast<mirror::FieldDexCacheType*>(raw_arrays + layout.FieldsOffset());
 
-  size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+  size_t num_strings = kDexCacheStringCacheSize;
   if (dex_file->NumStringIds() < num_strings) {
     num_strings = dex_file->NumStringIds();
   }
-  size_t num_types = mirror::DexCache::kDexCacheTypeCacheSize;
+  size_t num_types = kDexCacheTypeCacheSize;
   if (dex_file->NumTypeIds() < num_types) {
     num_types = dex_file->NumTypeIds();
   }
+  size_t num_fields = kDexCacheFieldCacheSize;
+  if (dex_file->NumFieldIds() < num_fields) {
+    num_fields = dex_file->NumFieldIds();
+  }
 
   // Note that we allocate the method type dex caches regardless of this flag,
   // and we make sure here that they're not used by the runtime. This is in the
@@ -80,17 +88,17 @@
   //
   // If this needs to be mitigated in a production system running this code,
   // DexCache::kDexCacheMethodTypeCacheSize can be set to zero.
-  mirror::MethodTypeDexCacheType* method_types = nullptr;
+  MethodTypeDexCacheType* method_types = nullptr;
   size_t num_method_types = 0;
 
-  if (dex_file->NumProtoIds() < mirror::DexCache::kDexCacheMethodTypeCacheSize) {
+  if (dex_file->NumProtoIds() < kDexCacheMethodTypeCacheSize) {
     num_method_types = dex_file->NumProtoIds();
   } else {
-    num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
+    num_method_types = kDexCacheMethodTypeCacheSize;
   }
 
   if (num_method_types > 0) {
-    method_types = reinterpret_cast<mirror::MethodTypeDexCacheType*>(
+    method_types = reinterpret_cast<MethodTypeDexCacheType*>(
         raw_arrays + layout.MethodTypesOffset());
   }
 
@@ -98,13 +106,13 @@
       ? nullptr
       : reinterpret_cast<GcRoot<mirror::CallSite>*>(raw_arrays + layout.CallSitesOffset());
 
-  DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
+  DCHECK_ALIGNED(raw_arrays, alignof(StringDexCacheType)) <<
                  "Expected raw_arrays to align to StringDexCacheType.";
-  DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) <<
+  DCHECK_ALIGNED(layout.StringsOffset(), alignof(StringDexCacheType)) <<
                  "Expected StringsOffset() to align to StringDexCacheType.";
-  DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) <<
+  DCHECK_ALIGNED(strings, alignof(StringDexCacheType)) <<
                  "Expected strings to align to StringDexCacheType.";
-  static_assert(alignof(mirror::StringDexCacheType) == 8u,
+  static_assert(alignof(StringDexCacheType) == 8u,
                 "Expected StringDexCacheType to have align of 8.");
   if (kIsDebugBuild) {
     // Sanity check to make sure all the dex cache arrays are empty. b/28992179
@@ -117,10 +125,11 @@
       CHECK(types[i].load(std::memory_order_relaxed).object.IsNull());
     }
     for (size_t i = 0; i < dex_file->NumMethodIds(); ++i) {
-      CHECK(mirror::DexCache::GetElementPtrSize(methods, i, image_pointer_size) == nullptr);
+      CHECK(GetElementPtrSize(methods, i, image_pointer_size) == nullptr);
     }
-    for (size_t i = 0; i < dex_file->NumFieldIds(); ++i) {
-      CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size) == nullptr);
+    for (size_t i = 0; i < num_fields; ++i) {
+      CHECK_EQ(GetNativePairPtrSize(fields, i, image_pointer_size).index, 0u);
+      CHECK(GetNativePairPtrSize(fields, i, image_pointer_size).object == nullptr);
     }
     for (size_t i = 0; i < num_method_types; ++i) {
       CHECK_EQ(method_types[i].load(std::memory_order_relaxed).index, 0u);
@@ -136,6 +145,9 @@
   if (types != nullptr) {
     mirror::TypeDexCachePair::Initialize(types);
   }
+  if (fields != nullptr) {
+    mirror::FieldDexCachePair::Initialize(fields, image_pointer_size);
+  }
   if (method_types != nullptr) {
     mirror::MethodTypeDexCachePair::Initialize(method_types);
   }
@@ -148,7 +160,7 @@
                   methods,
                   dex_file->NumMethodIds(),
                   fields,
-                  dex_file->NumFieldIds(),
+                  num_fields,
                   method_types,
                   num_method_types,
                   call_sites,
@@ -164,7 +176,7 @@
                     uint32_t num_resolved_types,
                     ArtMethod** resolved_methods,
                     uint32_t num_resolved_methods,
-                    ArtField** resolved_fields,
+                    FieldDexCacheType* resolved_fields,
                     uint32_t num_resolved_fields,
                     MethodTypeDexCacheType* resolved_method_types,
                     uint32_t num_resolved_method_types,
@@ -218,5 +230,23 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
 }
 
+#if !defined(__aarch64__) && !defined(__x86_64__)
+static pthread_mutex_t dex_cache_slow_atomic_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+DexCache::ConversionPair64 DexCache::AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target) {
+  pthread_mutex_lock(&dex_cache_slow_atomic_mutex);
+  DexCache::ConversionPair64 value = *reinterpret_cast<ConversionPair64*>(target);
+  pthread_mutex_unlock(&dex_cache_slow_atomic_mutex);
+  return value;
+}
+
+void DexCache::AtomicStoreRelease16B(std::atomic<ConversionPair64>* target,
+                                     ConversionPair64 value) {
+  pthread_mutex_lock(&dex_cache_slow_atomic_mutex);
+  *reinterpret_cast<ConversionPair64*>(target) = value;
+  pthread_mutex_unlock(&dex_cache_slow_atomic_mutex);
+}
+#endif
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 0579198..35707ef 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -91,12 +91,44 @@
   }
 };
 
+template <typename T> struct PACKED(2 * __SIZEOF_POINTER__) NativeDexCachePair {
+  T* object;
+  size_t index;
+  // This is similar to DexCachePair except that we're storing a native pointer
+  // instead of a GC root. See DexCachePair for the details.
+  NativeDexCachePair(T* object, uint32_t index)
+      : object(object),
+        index(index) {}
+  NativeDexCachePair() : object(nullptr), index(0u) { }
+  NativeDexCachePair(const NativeDexCachePair<T>&) = default;
+  NativeDexCachePair& operator=(const NativeDexCachePair<T>&) = default;
+
+  static void Initialize(std::atomic<NativeDexCachePair<T>>* dex_cache, PointerSize pointer_size);
+
+  static uint32_t InvalidIndexForSlot(uint32_t slot) {
+    // Since the cache size is a power of two, 0 will always map to slot 0.
+    // Use 1 for slot 0 and 0 for all other slots.
+    return (slot == 0) ? 1u : 0u;
+  }
+
+  T* GetObjectForIndex(uint32_t idx) REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (idx != index) {
+      return nullptr;
+    }
+    DCHECK(object != nullptr);
+    return object;
+  }
+};
+
 using TypeDexCachePair = DexCachePair<Class>;
 using TypeDexCacheType = std::atomic<TypeDexCachePair>;
 
 using StringDexCachePair = DexCachePair<String>;
 using StringDexCacheType = std::atomic<StringDexCachePair>;
 
+using FieldDexCachePair = NativeDexCachePair<ArtField>;
+using FieldDexCacheType = std::atomic<FieldDexCachePair>;
+
 using MethodTypeDexCachePair = DexCachePair<MethodType>;
 using MethodTypeDexCacheType = std::atomic<MethodTypeDexCachePair>;
 
@@ -116,6 +148,11 @@
   static_assert(IsPowerOfTwo(kDexCacheStringCacheSize),
                 "String dex cache size is not a power of 2.");
 
+  // Size of field dex cache. Needs to be a power of 2 for entrypoint assumptions to hold.
+  static constexpr size_t kDexCacheFieldCacheSize = 1024;
+  static_assert(IsPowerOfTwo(kDexCacheFieldCacheSize),
+                "Field dex cache size is not a power of 2.");
+
   // Size of method type dex cache. Needs to be a power of 2 for entrypoint assumptions
   // to hold.
   static constexpr size_t kDexCacheMethodTypeCacheSize = 1024;
@@ -130,6 +167,10 @@
     return kDexCacheStringCacheSize;
   }
 
+  static constexpr size_t StaticArtFieldSize() {
+    return kDexCacheFieldCacheSize;
+  }
+
   static constexpr size_t StaticMethodTypeSize() {
     return kDexCacheMethodTypeCacheSize;
   }
@@ -255,6 +296,8 @@
   // Pointer sized variant, used for patching.
   ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, PointerSize ptr_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void ClearResolvedField(uint32_t idx, PointerSize ptr_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   MethodType* GetResolvedMethodType(uint32_t proto_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -299,11 +342,11 @@
     SetFieldPtr<false>(ResolvedMethodsOffset(), resolved_methods);
   }
 
-  ArtField** GetResolvedFields() ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldPtr<ArtField**>(ResolvedFieldsOffset());
+  FieldDexCacheType* GetResolvedFields() ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldPtr<FieldDexCacheType*>(ResolvedFieldsOffset());
   }
 
-  void SetResolvedFields(ArtField** resolved_fields)
+  void SetResolvedFields(FieldDexCacheType* resolved_fields)
       ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     SetFieldPtr<false>(ResolvedFieldsOffset(), resolved_fields);
@@ -376,6 +419,17 @@
   template <typename PtrType>
   static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, PointerSize ptr_size);
 
+  template <typename T>
+  static NativeDexCachePair<T> GetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                                    size_t idx,
+                                                    PointerSize ptr_size);
+
+  template <typename T>
+  static void SetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                   size_t idx,
+                                   NativeDexCachePair<T> pair,
+                                   PointerSize ptr_size);
+
  private:
   void Init(const DexFile* dex_file,
             ObjPtr<String> location,
@@ -385,7 +439,7 @@
             uint32_t num_resolved_types,
             ArtMethod** resolved_methods,
             uint32_t num_resolved_methods,
-            ArtField** resolved_fields,
+            FieldDexCacheType* resolved_fields,
             uint32_t num_resolved_fields,
             MethodTypeDexCacheType* resolved_method_types,
             uint32_t num_resolved_method_types,
@@ -394,8 +448,22 @@
             PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations,
+  // so we use a custom pair class for loading and storing the NativeDexCachePair<>.
+  template <typename IntType>
+  struct PACKED(2 * sizeof(IntType)) ConversionPair {
+    ConversionPair(IntType f, IntType s) : first(f), second(s) { }
+    ConversionPair(const ConversionPair&) = default;
+    ConversionPair& operator=(const ConversionPair&) = default;
+    IntType first;
+    IntType second;
+  };
+  using ConversionPair32 = ConversionPair<uint32_t>;
+  using ConversionPair64 = ConversionPair<uint64_t>;
+
   uint32_t StringSlotIndex(dex::StringIndex string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
   uint32_t TypeSlotIndex(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  uint32_t FieldSlotIndex(uint32_t field_idx) REQUIRES_SHARED(Locks::mutator_lock_);
   uint32_t MethodTypeSlotIndex(uint32_t proto_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit instance fields of the dex cache as well as its associated arrays.
@@ -406,12 +474,55 @@
   void VisitReferences(ObjPtr<Class> klass, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
+  // Due to lack of 16-byte atomics support, we use hand-crafted routines.
+#if  defined(__aarch64__)
+  // 16-byte atomics are supported on aarch64.
+  ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
+      std::atomic<ConversionPair64>* target) {
+    return target->load(std::memory_order_relaxed);
+  }
+
+  ALWAYS_INLINE static void AtomicStoreRelease16B(
+      std::atomic<ConversionPair64>* target, ConversionPair64 value) {
+    target->store(value, std::memory_order_release);
+  }
+#elif defined(__x86_64__)
+  ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
+      std::atomic<ConversionPair64>* target) {
+    uint64_t first, second;
+    __asm__ __volatile__(
+        "lock cmpxchg16b (%2)"
+        : "=&a"(first), "=&d"(second)
+        : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0)
+        : "cc");
+    return ConversionPair64(first, second);
+  }
+
+  ALWAYS_INLINE static void AtomicStoreRelease16B(
+      std::atomic<ConversionPair64>* target, ConversionPair64 value) {
+    uint64_t first, second;
+    __asm__ __volatile__ (
+        "movq (%2), %%rax\n\t"
+        "movq 8(%2), %%rdx\n\t"
+        "1:\n\t"
+        "lock cmpxchg16b (%2)\n\t"
+        "jnz 1b"
+        : "=&a"(first), "=&d"(second)
+        : "r"(target), "b"(value.first), "c"(value.second)
+        : "cc");
+  }
+#else
+  static ConversionPair64 AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target);
+  static void AtomicStoreRelease16B(std::atomic<ConversionPair64>* target, ConversionPair64 value);
+#endif
+
   HeapReference<Object> dex_;
   HeapReference<String> location_;
   uint64_t dex_file_;               // const DexFile*
   uint64_t resolved_call_sites_;    // GcRoot<CallSite>* array with num_resolved_call_sites_
                                     // elements.
-  uint64_t resolved_fields_;        // ArtField*, array with num_resolved_fields_ elements.
+  uint64_t resolved_fields_;        // std::atomic<FieldDexCachePair>*, array with
+                                    // num_resolved_fields_ elements.
   uint64_t resolved_method_types_;  // std::atomic<MethodTypeDexCachePair>* array with
                                     // num_resolved_method_types_ elements.
   uint64_t resolved_methods_;       // ArtMethod*, array with num_resolved_methods_ elements.
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index ef0aaaa..71a47f6 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -54,7 +54,8 @@
   EXPECT_TRUE(dex_cache->StaticTypeSize() == dex_cache->NumResolvedTypes()
       || java_lang_dex_file_->NumTypeIds() == dex_cache->NumResolvedTypes());
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods());
-  EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
+  EXPECT_TRUE(dex_cache->StaticArtFieldSize() == dex_cache->NumResolvedFields()
+      || java_lang_dex_file_->NumFieldIds() ==  dex_cache->NumResolvedFields());
   EXPECT_TRUE(dex_cache->StaticMethodTypeSize() == dex_cache->NumResolvedMethodTypes()
       || java_lang_dex_file_->NumProtoIds() == dex_cache->NumResolvedMethodTypes());
 }
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index f6b6489..54034c2 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -68,8 +68,16 @@
     }
   }
   mirror::DexCache* const dex_cache = declaring_class->GetDexCache();
-  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), kRuntimePointerSize);
-  CHECK(art_field != nullptr);
+  ArtField* art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), kRuntimePointerSize);
+  if (UNLIKELY(art_field == nullptr)) {
+    if (IsStatic()) {
+      art_field = declaring_class->FindDeclaredStaticField(dex_cache, GetDexFieldIndex());
+    } else {
+      art_field = declaring_class->FindInstanceField(dex_cache, GetDexFieldIndex());
+    }
+    CHECK(art_field != nullptr);
+    dex_cache->SetResolvedField(GetDexFieldIndex(), art_field, kRuntimePointerSize);
+  }
   CHECK_EQ(declaring_class, art_field->GetDeclaringClass());
   return art_field;
 }
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index e761e4d..d306f9c 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -726,57 +726,60 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
   StackHandleScope<2> hs(soa.Self());
-  ObjPtr<mirror::Object, /*kPoison*/ true> null_ptr;
-  EXPECT_TRUE(null_ptr.IsNull());
-  EXPECT_TRUE(null_ptr.IsValid());
-  EXPECT_TRUE(null_ptr.Ptr() == nullptr);
-  EXPECT_TRUE(null_ptr == nullptr);
-  EXPECT_TRUE(null_ptr == null_ptr);
-  EXPECT_FALSE(null_ptr != null_ptr);
-  EXPECT_FALSE(null_ptr != nullptr);
-  null_ptr.AssertValid();
   Handle<ClassLoader> class_loader(hs.NewHandle(soa.Decode<ClassLoader>(jclass_loader)));
   Handle<mirror::Class> h_X(
       hs.NewHandle(class_linker_->FindClass(soa.Self(), "LX;", class_loader)));
-  ObjPtr<Class, /*kPoison*/ true> X(h_X.Get());
-  EXPECT_TRUE(!X.IsNull());
-  EXPECT_TRUE(X.IsValid());
-  EXPECT_TRUE(X.Ptr() != nullptr);
-  EXPECT_OBJ_PTR_EQ(h_X.Get(), X);
-  // FindClass may cause thread suspension, it should invalidate X.
-  ObjPtr<Class, /*kPoison*/ true> Y(class_linker_->FindClass(soa.Self(), "LY;", class_loader));
-  EXPECT_TRUE(!Y.IsNull());
-  EXPECT_TRUE(Y.IsValid());
-  EXPECT_TRUE(Y.Ptr() != nullptr);
 
-  // Should IsNull be safe to call on null ObjPtr? I'll allow it for now.
-  EXPECT_TRUE(!X.IsNull());
-  EXPECT_TRUE(!X.IsValid());
-  // Make X valid again by copying out of handle.
-  X.Assign(h_X.Get());
-  EXPECT_TRUE(!X.IsNull());
-  EXPECT_TRUE(X.IsValid());
-  EXPECT_OBJ_PTR_EQ(h_X.Get(), X);
+  if (kObjPtrPoisoning) {
+    ObjPtr<mirror::Object> null_ptr;
+    EXPECT_TRUE(null_ptr.IsNull());
+    EXPECT_TRUE(null_ptr.IsValid());
+    EXPECT_TRUE(null_ptr.Ptr() == nullptr);
+    EXPECT_TRUE(null_ptr == nullptr);
+    EXPECT_TRUE(null_ptr == null_ptr);
+    EXPECT_FALSE(null_ptr != null_ptr);
+    EXPECT_FALSE(null_ptr != nullptr);
+    null_ptr.AssertValid();
+    ObjPtr<Class> X(h_X.Get());
+    EXPECT_TRUE(!X.IsNull());
+    EXPECT_TRUE(X.IsValid());
+    EXPECT_TRUE(X.Ptr() != nullptr);
+    EXPECT_OBJ_PTR_EQ(h_X.Get(), X);
+    // FindClass may cause thread suspension, it should invalidate X.
+    ObjPtr<Class> Y(class_linker_->FindClass(soa.Self(), "LY;", class_loader));
+    EXPECT_TRUE(!Y.IsNull());
+    EXPECT_TRUE(Y.IsValid());
+    EXPECT_TRUE(Y.Ptr() != nullptr);
 
-  // Allow thread suspension to invalidate Y.
-  soa.Self()->AllowThreadSuspension();
-  EXPECT_TRUE(!Y.IsNull());
-  EXPECT_TRUE(!Y.IsValid());
+    // Should IsNull be safe to call on null ObjPtr? I'll allow it for now.
+    EXPECT_TRUE(!X.IsNull());
+    EXPECT_TRUE(!X.IsValid());
+    // Make X valid again by copying out of handle.
+    X.Assign(h_X.Get());
+    EXPECT_TRUE(!X.IsNull());
+    EXPECT_TRUE(X.IsValid());
+    EXPECT_OBJ_PTR_EQ(h_X.Get(), X);
 
-  // Test unpoisoned.
-  ObjPtr<mirror::Object, /*kPoison*/ false> unpoisoned;
-  EXPECT_TRUE(unpoisoned.IsNull());
-  EXPECT_TRUE(unpoisoned.IsValid());
-  EXPECT_TRUE(unpoisoned.Ptr() == nullptr);
-  EXPECT_TRUE(unpoisoned == nullptr);
-  EXPECT_TRUE(unpoisoned == unpoisoned);
-  EXPECT_FALSE(unpoisoned != unpoisoned);
-  EXPECT_FALSE(unpoisoned != nullptr);
+    // Allow thread suspension to invalidate Y.
+    soa.Self()->AllowThreadSuspension();
+    EXPECT_TRUE(!Y.IsNull());
+    EXPECT_TRUE(!Y.IsValid());
+  } else {
+    // Test unpoisoned.
+    ObjPtr<mirror::Object> unpoisoned;
+    EXPECT_TRUE(unpoisoned.IsNull());
+    EXPECT_TRUE(unpoisoned.IsValid());
+    EXPECT_TRUE(unpoisoned.Ptr() == nullptr);
+    EXPECT_TRUE(unpoisoned == nullptr);
+    EXPECT_TRUE(unpoisoned == unpoisoned);
+    EXPECT_FALSE(unpoisoned != unpoisoned);
+    EXPECT_FALSE(unpoisoned != nullptr);
 
-  unpoisoned = h_X.Get();
-  EXPECT_FALSE(unpoisoned.IsNull());
-  EXPECT_TRUE(unpoisoned == h_X.Get());
-  EXPECT_OBJ_PTR_EQ(unpoisoned, h_X.Get());
+    unpoisoned = h_X.Get();
+    EXPECT_FALSE(unpoisoned.IsNull());
+    EXPECT_TRUE(unpoisoned == h_X.Get());
+    EXPECT_OBJ_PTR_EQ(unpoisoned, h_X.Get());
+  }
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index c2407d7..57b20a1 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -36,7 +36,7 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize(PointerSize pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 57;
+  uint32_t vtable_entries = Object::kVTableLength + 56;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
 }
 
@@ -311,9 +311,7 @@
 inline bool String::AllASCII(const MemoryType* chars, const int length) {
   static_assert(std::is_unsigned<MemoryType>::value, "Expecting unsigned MemoryType");
   for (int i = 0; i < length; ++i) {
-    // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
-    // because it would complicate the detection of ASCII strings in Modified-UTF8.
-    if ((chars[i] - 1u) >= 0x7fu) {
+    if (!IsASCII(chars[i])) {
       return false;
     }
   }
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 0ab0bd6..884b88a 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -79,14 +79,55 @@
   }
 }
 
-void String::SetCharAt(int32_t index, uint16_t c) {
-  DCHECK((index >= 0) && (index < GetLength()));
-  if (IsCompressed()) {
-    // TODO: Handle the case where String is compressed and c is non-ASCII
-    GetValueCompressed()[index] = static_cast<uint8_t>(c);
-  } else {
-    GetValue()[index] = c;
+inline bool String::AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii) {
+  DCHECK(!IsASCII(non_ascii));
+  for (int32_t i = 0; i < length; ++i) {
+    if (!IsASCII(chars[i]) && chars[i] != non_ascii) {
+      return false;
+    }
   }
+  return true;
+}
+
+ObjPtr<String> String::DoReplace(Thread* self, uint16_t old_c, uint16_t new_c) {
+  DCHECK(IsCompressed() ? ContainsElement(ArrayRef<uint8_t>(value_compressed_, GetLength()), old_c)
+                        : ContainsElement(ArrayRef<uint16_t>(value_, GetLength()), old_c));
+  int32_t length = GetLength();
+  bool compressible =
+      kUseStringCompression &&
+      IsASCII(new_c) &&
+      (IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(value_, length, old_c)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  const int32_t length_with_flag = String::GetFlaggedCount(GetLength(), compressible);
+  SetStringCountVisitor visitor(length_with_flag);
+  ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
+  if (UNLIKELY(string == nullptr)) {
+    return nullptr;
+  }
+  if (compressible) {
+    auto replace = [old_c, new_c](uint16_t c) {
+      return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c);
+    };
+    uint8_t* out = string->value_compressed_;
+    if (LIKELY(IsCompressed())) {  // LIKELY(compressible == IsCompressed())
+      std::transform(value_compressed_, value_compressed_ + length, out, replace);
+    } else {
+      std::transform(value_, value_ + length, out, replace);
+    }
+    DCHECK(kUseStringCompression && AllASCII(out, length));
+  } else {
+    auto replace = [old_c, new_c](uint16_t c) {
+      return (old_c != c) ? c : new_c;
+    };
+    uint16_t* out = string->value_;
+    if (UNLIKELY(IsCompressed())) {  // LIKELY(compressible == IsCompressed())
+      std::transform(value_compressed_, value_compressed_ + length, out, replace);
+    } else {
+      std::transform(value_, value_ + length, out, replace);
+    }
+    DCHECK(!kUseStringCompression || !AllASCII(out, length));
+  }
+  return string;
 }
 
 String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 38f6dd4..dbb5a4c 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -32,7 +32,7 @@
 namespace mirror {
 
 // String Compression
-static constexpr bool kUseStringCompression = false;
+static constexpr bool kUseStringCompression = true;
 enum class StringCompressionFlag : uint32_t {
     kCompressed = 0u,
     kUncompressed = 1u
@@ -94,7 +94,10 @@
 
   uint16_t CharAt(int32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetCharAt(int32_t index, uint16_t c) REQUIRES_SHARED(Locks::mutator_lock_);
+  // Create a new string where all occurences of `old_c` are replaced with `new_c`.
+  // String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
+  ObjPtr<String> DoReplace(Thread* self, uint16_t old_c, uint16_t new_c)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -229,6 +232,14 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
+  static constexpr bool IsASCII(uint16_t c) {
+    // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
+    // because it would complicate the detection of ASCII strings in Modified-UTF8.
+    return (c - 1u) < 0x7fu;
+  }
+
+  static bool AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii);
+
   void SetHashCode(int32_t new_hash_code) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Hash code is invariant so use non-transactional mode. Also disable check as we may run inside
     // a transaction.
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index ae6b31d..461f870 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -45,6 +45,9 @@
 static constexpr uint32_t kAccConstructor =           0x00010000;  // method (dex only) <(cl)init>
 static constexpr uint32_t kAccDeclaredSynchronized =  0x00020000;  // method (dex only)
 static constexpr uint32_t kAccClassIsProxy =          0x00040000;  // class  (dex only)
+// Set to indicate that the ArtMethod is obsolete and has a different DexCache + DexFile from its
+// declaring class. This flag may only be applied to methods.
+static constexpr uint32_t kAccObsoleteMethod =        0x00040000;  // method (runtime)
 // Used by a method to denote that its execution does not need to go through slow path interpreter.
 static constexpr uint32_t kAccSkipAccessChecks =      0x00080000;  // method (dex only)
 // Used by a class to denote that the verifier has attempted to check it at least once.
@@ -67,10 +70,6 @@
 
 // Set by the verifier for a method that could not be verified to follow structured locking.
 static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
-// Set to indicate that the ArtMethod is obsolete and has a different DexCache from its declaring
-// class.
-// TODO Might want to re-arrange some of these so that we can have obsolete + intrinsic methods.
-static constexpr uint32_t kAccObsoleteMethod =        0x04000000;  // method (runtime)
 
 // Set by the class linker for a method that has only one implementation for a
 // virtual call.
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 0d24587..f6a73a8 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -537,14 +537,14 @@
   NATIVE_METHOD(VMDebug, getAllocCount, "(I)I"),
   NATIVE_METHOD(VMDebug, getHeapSpaceStats, "([J)V"),
   NATIVE_METHOD(VMDebug, getInstructionCount, "([I)V"),
-  NATIVE_METHOD(VMDebug, getLoadedClassCount, "!()I"),
+  FAST_NATIVE_METHOD(VMDebug, getLoadedClassCount, "()I"),
   NATIVE_METHOD(VMDebug, getVmFeatureList, "()[Ljava/lang/String;"),
   NATIVE_METHOD(VMDebug, infopoint, "(I)V"),
-  NATIVE_METHOD(VMDebug, isDebuggerConnected, "!()Z"),
-  NATIVE_METHOD(VMDebug, isDebuggingEnabled, "!()Z"),
+  FAST_NATIVE_METHOD(VMDebug, isDebuggerConnected, "()Z"),
+  FAST_NATIVE_METHOD(VMDebug, isDebuggingEnabled, "()Z"),
   NATIVE_METHOD(VMDebug, getMethodTracingMode, "()I"),
-  NATIVE_METHOD(VMDebug, lastDebuggerActivity, "!()J"),
-  NATIVE_METHOD(VMDebug, printLoadedClasses, "!(I)V"),
+  FAST_NATIVE_METHOD(VMDebug, lastDebuggerActivity, "()J"),
+  FAST_NATIVE_METHOD(VMDebug, printLoadedClasses, "(I)V"),
   NATIVE_METHOD(VMDebug, resetAllocCount, "(I)V"),
   NATIVE_METHOD(VMDebug, resetInstructionCount, "()V"),
   NATIVE_METHOD(VMDebug, startAllocCounting, "()V"),
@@ -557,7 +557,7 @@
   NATIVE_METHOD(VMDebug, stopEmulatorTracing, "()V"),
   NATIVE_METHOD(VMDebug, stopInstructionCounting, "()V"),
   NATIVE_METHOD(VMDebug, stopMethodTracing, "()V"),
-  NATIVE_METHOD(VMDebug, threadCpuTimeNanos, "!()J"),
+  FAST_NATIVE_METHOD(VMDebug, threadCpuTimeNanos, "()J"),
   NATIVE_METHOD(VMDebug, getRuntimeStatInternal, "(I)Ljava/lang/String;"),
   NATIVE_METHOD(VMDebug, getRuntimeStatsInternal, "()[Ljava/lang/String;"),
   NATIVE_METHOD(VMDebug, attachAgent, "(Ljava/lang/String;)V"),
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 6bfccdc..d81c13d 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -444,6 +444,7 @@
   if (!kPreloadDexCachesCollectStats) {
     return;
   }
+  // TODO: Update for hash-based DexCache arrays.
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
   for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
@@ -463,7 +464,7 @@
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedFields(); j++) {
-      ArtField* field = class_linker->GetResolvedField(j, dex_cache);
+      ArtField* field = dex_cache->GetResolvedField(j, class_linker->GetImagePointerSize());
       if (field != nullptr) {
         filled->num_fields++;
       }
@@ -580,9 +581,7 @@
 static void VMRuntime_registerAppInfo(JNIEnv* env,
                                       jclass clazz ATTRIBUTE_UNUSED,
                                       jstring profile_file,
-                                      jstring app_dir,
-                                      jobjectArray code_paths,
-                                      jstring foreign_dex_profile_path) {
+                                      jobjectArray code_paths) {
   std::vector<std::string> code_paths_vec;
   int code_paths_length = env->GetArrayLength(code_paths);
   for (int i = 0; i < code_paths_length; i++) {
@@ -596,22 +595,7 @@
   std::string profile_file_str(raw_profile_file);
   env->ReleaseStringUTFChars(profile_file, raw_profile_file);
 
-  std::string foreign_dex_profile_path_str = "";
-  if (foreign_dex_profile_path != nullptr) {
-    const char* raw_foreign_dex_profile_path =
-        env->GetStringUTFChars(foreign_dex_profile_path, nullptr);
-    foreign_dex_profile_path_str.assign(raw_foreign_dex_profile_path);
-    env->ReleaseStringUTFChars(foreign_dex_profile_path, raw_foreign_dex_profile_path);
-  }
-
-  const char* raw_app_dir = env->GetStringUTFChars(app_dir, nullptr);
-  std::string app_dir_str(raw_app_dir);
-  env->ReleaseStringUTFChars(app_dir, raw_app_dir);
-
-  Runtime::Current()->RegisterAppInfo(code_paths_vec,
-                                      profile_file_str,
-                                      foreign_dex_profile_path_str,
-                                      app_dir_str);
+  Runtime::Current()->RegisterAppInfo(code_paths_vec, profile_file_str);
 }
 
 static jboolean VMRuntime_isBootClassPathOnDisk(JNIEnv* env, jclass, jstring java_instruction_set) {
@@ -642,7 +626,7 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(VMRuntime, addressOf, "!(Ljava/lang/Object;)J"),
+  FAST_NATIVE_METHOD(VMRuntime, addressOf, "(Ljava/lang/Object;)J"),
   NATIVE_METHOD(VMRuntime, bootClassPath, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, clampGrowthLimit, "()V"),
   NATIVE_METHOD(VMRuntime, classPath, "()Ljava/lang/String;"),
@@ -650,11 +634,11 @@
   NATIVE_METHOD(VMRuntime, concurrentGC, "()V"),
   NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"),
   NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"),
-  NATIVE_METHOD(VMRuntime, isDebuggerActive, "!()Z"),
-  NATIVE_METHOD(VMRuntime, isNativeDebuggable, "!()Z"),
+  FAST_NATIVE_METHOD(VMRuntime, isDebuggerActive, "()Z"),
+  FAST_NATIVE_METHOD(VMRuntime, isNativeDebuggable, "()Z"),
   NATIVE_METHOD(VMRuntime, nativeSetTargetHeapUtilization, "(F)V"),
-  NATIVE_METHOD(VMRuntime, newNonMovableArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
-  NATIVE_METHOD(VMRuntime, newUnpaddedArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(VMRuntime, newNonMovableArray, "(Ljava/lang/Class;I)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(VMRuntime, newUnpaddedArray, "(Ljava/lang/Class;I)Ljava/lang/Object;"),
   NATIVE_METHOD(VMRuntime, properties, "()[Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"),
   NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"),
@@ -671,11 +655,10 @@
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmInstructionSet, "()Ljava/lang/String;"),
-  NATIVE_METHOD(VMRuntime, is64Bit, "!()Z"),
-  NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
+  FAST_NATIVE_METHOD(VMRuntime, is64Bit, "()Z"),
+  FAST_NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
-  NATIVE_METHOD(VMRuntime, registerAppInfo,
-                "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;Ljava/lang/String;)V"),
+  NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;[Ljava/lang/String;)V"),
   NATIVE_METHOD(VMRuntime, isBootClassPathOnDisk, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(VMRuntime, getCurrentInstructionSet, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, didPruneDalvikCache, "()Z"),
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index be6f7f2..0dfafa4 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -139,11 +139,11 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(VMStack, fillStackTraceElements, "!(Ljava/lang/Thread;[Ljava/lang/StackTraceElement;)I"),
-  NATIVE_METHOD(VMStack, getCallingClassLoader, "!()Ljava/lang/ClassLoader;"),
-  NATIVE_METHOD(VMStack, getClosestUserClassLoader, "!()Ljava/lang/ClassLoader;"),
-  NATIVE_METHOD(VMStack, getStackClass2, "!()Ljava/lang/Class;"),
-  NATIVE_METHOD(VMStack, getThreadStackTrace, "!(Ljava/lang/Thread;)[Ljava/lang/StackTraceElement;"),
+  FAST_NATIVE_METHOD(VMStack, fillStackTraceElements, "(Ljava/lang/Thread;[Ljava/lang/StackTraceElement;)I"),
+  FAST_NATIVE_METHOD(VMStack, getCallingClassLoader, "()Ljava/lang/ClassLoader;"),
+  FAST_NATIVE_METHOD(VMStack, getClosestUserClassLoader, "()Ljava/lang/ClassLoader;"),
+  FAST_NATIVE_METHOD(VMStack, getStackClass2, "()Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(VMStack, getThreadStackTrace, "(Ljava/lang/Thread;)[Ljava/lang/StackTraceElement;"),
 };
 
 void register_dalvik_system_VMStack(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index fd22d9e..836ba81 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -26,9 +26,11 @@
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
+#include "non_debuggable_classes.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "trace.h"
 
 #if defined(__linux__)
@@ -39,6 +41,10 @@
 
 namespace art {
 
+// Set to true to always determine the non-debuggable classes even if we would not allow a debugger
+// to actually attach.
+static constexpr bool kAlwaysCollectNonDebuggableClasses = kIsDebugBuild;
+
 using android::base::StringPrintf;
 
 static void EnableDebugger() {
@@ -68,6 +74,82 @@
   }
 }
 
+class ClassSet {
+ public:
+  // The number of classes we reasonably expect to have to look at. Realistically the number is more
+  // ~10 but there is little harm in having some extra.
+  static constexpr int kClassSetCapacity = 100;
+
+  explicit ClassSet(Thread* const self) : self_(self) {
+    self_->GetJniEnv()->PushFrame(kClassSetCapacity);
+  }
+
+  ~ClassSet() {
+    self_->GetJniEnv()->PopFrame();
+  }
+
+  void AddClass(ObjPtr<mirror::Class> klass) REQUIRES(Locks::mutator_lock_) {
+    class_set_.insert(self_->GetJniEnv()->AddLocalReference<jclass>(klass.Ptr()));
+  }
+
+  const std::unordered_set<jclass>& GetClasses() const {
+    return class_set_;
+  }
+
+ private:
+  Thread* const self_;
+  std::unordered_set<jclass> class_set_;
+};
+
+static void DoCollectNonDebuggableCallback(Thread* thread, void* data)
+    REQUIRES(Locks::mutator_lock_) {
+  class NonDebuggableStacksVisitor : public StackVisitor {
+   public:
+    NonDebuggableStacksVisitor(Thread* t, ClassSet* class_set)
+        : StackVisitor(t, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          class_set_(class_set) {}
+
+    ~NonDebuggableStacksVisitor() OVERRIDE {}
+
+    bool VisitFrame() OVERRIDE REQUIRES(Locks::mutator_lock_) {
+      if (GetMethod()->IsRuntimeMethod()) {
+        return true;
+      }
+      class_set_->AddClass(GetMethod()->GetDeclaringClass());
+      if (kIsDebugBuild) {
+        LOG(INFO) << GetMethod()->GetDeclaringClass()->PrettyClass()
+                  << " might not be fully debuggable/deoptimizable due to "
+                  << GetMethod()->PrettyMethod() << " appearing on the stack during zygote fork.";
+      }
+      return true;
+    }
+
+   private:
+    ClassSet* class_set_;
+  };
+  NonDebuggableStacksVisitor visitor(thread, reinterpret_cast<ClassSet*>(data));
+  visitor.WalkStack();
+}
+
+static void CollectNonDebuggableClasses() REQUIRES(!Locks::mutator_lock_) {
+  Runtime* const runtime = Runtime::Current();
+  Thread* const self = Thread::Current();
+  // Get the mutator lock.
+  ScopedObjectAccess soa(self);
+  ClassSet classes(self);
+  {
+    // Drop the shared mutator lock.
+    ScopedThreadSuspension sts(self, art::ThreadState::kNative);
+    // Get exclusive mutator lock with suspend all.
+    ScopedSuspendAll suspend("Checking stacks for non-obsoletable methods!", /*long_suspend*/false);
+    MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    runtime->GetThreadList()->ForEach(DoCollectNonDebuggableCallback, &classes);
+  }
+  for (jclass klass : classes.GetClasses()) {
+    NonDebuggableClasses::AddNonDebuggableClass(klass);
+  }
+}
+
 static void EnableDebugFeatures(uint32_t debug_flags) {
   // Must match values in com.android.internal.os.Zygote.
   enum {
@@ -131,12 +213,17 @@
     debug_flags &= ~DEBUG_ALWAYS_JIT;
   }
 
+  bool needs_non_debuggable_classes = false;
   if ((debug_flags & DEBUG_JAVA_DEBUGGABLE) != 0) {
     runtime->AddCompilerOption("--debuggable");
     runtime->SetJavaDebuggable(true);
     // Deoptimize the boot image as it may be non-debuggable.
     runtime->DeoptimizeBootImage();
     debug_flags &= ~DEBUG_JAVA_DEBUGGABLE;
+    needs_non_debuggable_classes = true;
+  }
+  if (needs_non_debuggable_classes || kAlwaysCollectNonDebuggableClasses) {
+    CollectNonDebuggableClasses();
   }
 
   if ((debug_flags & DEBUG_NATIVE_DEBUGGABLE) != 0) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 256787b..c8431c0 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -713,36 +713,36 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Class, classForName,
-                "!(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;"),
-  NATIVE_METHOD(Class, getDeclaredAnnotation,
-                "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Class, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Class, getDeclaredClasses, "!()[Ljava/lang/Class;"),
-  NATIVE_METHOD(Class, getDeclaredConstructorInternal,
-                "!([Ljava/lang/Class;)Ljava/lang/reflect/Constructor;"),
-  NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "!(Z)[Ljava/lang/reflect/Constructor;"),
-  NATIVE_METHOD(Class, getDeclaredField, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getPublicFieldRecursive, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getDeclaredFieldsUnchecked, "!(Z)[Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getDeclaredMethodInternal,
-                "!(Ljava/lang/String;[Ljava/lang/Class;)Ljava/lang/reflect/Method;"),
-  NATIVE_METHOD(Class, getDeclaredMethodsUnchecked,
-                "!(Z)[Ljava/lang/reflect/Method;"),
-  NATIVE_METHOD(Class, getDeclaringClass, "!()Ljava/lang/Class;"),
-  NATIVE_METHOD(Class, getEnclosingClass, "!()Ljava/lang/Class;"),
-  NATIVE_METHOD(Class, getEnclosingConstructorNative, "!()Ljava/lang/reflect/Constructor;"),
-  NATIVE_METHOD(Class, getEnclosingMethodNative, "!()Ljava/lang/reflect/Method;"),
-  NATIVE_METHOD(Class, getInnerClassFlags, "!(I)I"),
-  NATIVE_METHOD(Class, getInnerClassName, "!()Ljava/lang/String;"),
-  NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
-  NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
-  NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getSignatureAnnotation, "!()[Ljava/lang/String;"),
-  NATIVE_METHOD(Class, isAnonymousClass, "!()Z"),
-  NATIVE_METHOD(Class, isDeclaredAnnotationPresent, "!(Ljava/lang/Class;)Z"),
-  NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Class, classForName,
+                "(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredAnnotation,
+                "(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredAnnotations, "()[Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredClasses, "()[Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredConstructorInternal,
+                "([Ljava/lang/Class;)Ljava/lang/reflect/Constructor;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "(Z)[Ljava/lang/reflect/Constructor;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredField, "(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
+  FAST_NATIVE_METHOD(Class, getPublicFieldRecursive, "(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredFields, "()[Ljava/lang/reflect/Field;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredFieldsUnchecked, "(Z)[Ljava/lang/reflect/Field;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredMethodInternal,
+                "(Ljava/lang/String;[Ljava/lang/Class;)Ljava/lang/reflect/Method;"),
+  FAST_NATIVE_METHOD(Class, getDeclaredMethodsUnchecked,
+                "(Z)[Ljava/lang/reflect/Method;"),
+  FAST_NATIVE_METHOD(Class, getDeclaringClass, "()Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Class, getEnclosingClass, "()Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Class, getEnclosingConstructorNative, "()Ljava/lang/reflect/Constructor;"),
+  FAST_NATIVE_METHOD(Class, getEnclosingMethodNative, "()Ljava/lang/reflect/Method;"),
+  FAST_NATIVE_METHOD(Class, getInnerClassFlags, "(I)I"),
+  FAST_NATIVE_METHOD(Class, getInnerClassName, "()Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Class, getNameNative, "()Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Class, getProxyInterfaces, "()[Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Class, getPublicDeclaredFields, "()[Ljava/lang/reflect/Field;"),
+  FAST_NATIVE_METHOD(Class, getSignatureAnnotation, "()[Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Class, isAnonymousClass, "()Z"),
+  FAST_NATIVE_METHOD(Class, isDeclaredAnnotationPresent, "(Ljava/lang/Class;)Z"),
+  FAST_NATIVE_METHOD(Class, newInstance, "()Ljava/lang/Object;"),
 };
 
 void register_java_lang_Class(JNIEnv* env) {
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index ee6dda5..8fda4df 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -95,11 +95,11 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(DexCache, getDexNative, "!()Lcom/android/dex/Dex;"),
-  NATIVE_METHOD(DexCache, getResolvedType, "!(I)Ljava/lang/Class;"),
-  NATIVE_METHOD(DexCache, getResolvedString, "!(I)Ljava/lang/String;"),
-  NATIVE_METHOD(DexCache, setResolvedType, "!(ILjava/lang/Class;)V"),
-  NATIVE_METHOD(DexCache, setResolvedString, "!(ILjava/lang/String;)V"),
+  FAST_NATIVE_METHOD(DexCache, getDexNative, "()Lcom/android/dex/Dex;"),
+  FAST_NATIVE_METHOD(DexCache, getResolvedType, "(I)Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(DexCache, getResolvedString, "(I)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(DexCache, setResolvedType, "(ILjava/lang/Class;)V"),
+  FAST_NATIVE_METHOD(DexCache, setResolvedString, "(ILjava/lang/String;)V"),
 };
 
 void register_java_lang_DexCache(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Object.cc b/runtime/native/java_lang_Object.cc
index 6493865..fb4f99a 100644
--- a/runtime/native/java_lang_Object.cc
+++ b/runtime/native/java_lang_Object.cc
@@ -20,7 +20,6 @@
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access-inl.h"
 
-
 namespace art {
 
 static jobject Object_internalClone(JNIEnv* env, jobject java_this) {
@@ -49,12 +48,19 @@
   soa.Decode<mirror::Object>(java_this)->Wait(soa.Self(), ms, ns);
 }
 
+static jint Object_identityHashCodeNative(JNIEnv* env, jclass, jobject javaObject) {
+  ScopedFastNativeObjectAccess soa(env);
+  ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(javaObject);
+  return static_cast<jint>(o->IdentityHashCode());
+}
+
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Object, internalClone, "!()Ljava/lang/Object;"),
-  NATIVE_METHOD(Object, notify, "!()V"),
-  NATIVE_METHOD(Object, notifyAll, "!()V"),
-  OVERLOADED_NATIVE_METHOD(Object, wait, "!()V", wait),
-  OVERLOADED_NATIVE_METHOD(Object, wait, "!(JI)V", waitJI),
+  FAST_NATIVE_METHOD(Object, internalClone, "()Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Object, notify, "()V"),
+  FAST_NATIVE_METHOD(Object, notifyAll, "()V"),
+  OVERLOADED_FAST_NATIVE_METHOD(Object, wait, "()V", wait),
+  OVERLOADED_FAST_NATIVE_METHOD(Object, wait, "(JI)V", waitJI),
+  FAST_NATIVE_METHOD(Object, identityHashCodeNative, "(Ljava/lang/Object;)I"),
 };
 
 void register_java_lang_Object(JNIEnv* env) {
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index f1d6ff5..2e561ff 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -99,9 +99,11 @@
   return soa.AddLocalReference<jstring>(result);
 }
 
-static void String_setCharAt(JNIEnv* env, jobject java_this, jint index, jchar c) {
+static jstring String_doReplace(JNIEnv* env, jobject java_this, jchar old_c, jchar new_c) {
   ScopedFastNativeObjectAccess soa(env);
-  soa.Decode<mirror::String>(java_this)->SetCharAt(index, c);
+  ObjPtr<mirror::String> result =
+      soa.Decode<mirror::String>(java_this)->DoReplace(soa.Self(), old_c, new_c);
+  return soa.AddLocalReference<jstring>(result);
 }
 
 static jcharArray String_toCharArray(JNIEnv* env, jobject java_this) {
@@ -111,15 +113,15 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(String, charAt, "!(I)C"),
-  NATIVE_METHOD(String, compareTo, "!(Ljava/lang/String;)I"),
-  NATIVE_METHOD(String, concat, "!(Ljava/lang/String;)Ljava/lang/String;"),
-  NATIVE_METHOD(String, fastIndexOf, "!(II)I"),
-  NATIVE_METHOD(String, fastSubstring, "!(II)Ljava/lang/String;"),
-  NATIVE_METHOD(String, getCharsNoCheck, "!(II[CI)V"),
-  NATIVE_METHOD(String, intern, "!()Ljava/lang/String;"),
-  NATIVE_METHOD(String, setCharAt, "!(IC)V"),
-  NATIVE_METHOD(String, toCharArray, "!()[C"),
+  FAST_NATIVE_METHOD(String, charAt, "(I)C"),
+  FAST_NATIVE_METHOD(String, compareTo, "(Ljava/lang/String;)I"),
+  FAST_NATIVE_METHOD(String, concat, "(Ljava/lang/String;)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(String, doReplace, "(CC)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(String, fastIndexOf, "(II)I"),
+  FAST_NATIVE_METHOD(String, fastSubstring, "(II)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(String, getCharsNoCheck, "(II[CI)V"),
+  FAST_NATIVE_METHOD(String, intern, "()Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(String, toCharArray, "()[C"),
 };
 
 void register_java_lang_String(JNIEnv* env) {
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
index e0738a4..ec3c7c2 100644
--- a/runtime/native/java_lang_StringFactory.cc
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -87,9 +87,9 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(StringFactory, newStringFromBytes, "!([BIII)Ljava/lang/String;"),
-  NATIVE_METHOD(StringFactory, newStringFromChars, "!(II[C)Ljava/lang/String;"),
-  NATIVE_METHOD(StringFactory, newStringFromString, "!(Ljava/lang/String;)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
 };
 
 void register_java_lang_StringFactory(JNIEnv* env) {
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 7f8da80..2cabce8 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -227,26 +227,16 @@
       javaDst, dstPos, count);
 }
 
-static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
-  if (UNLIKELY(javaObject == nullptr)) {
-    return 0;
-  }
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(javaObject);
-  return static_cast<jint>(o->IdentityHashCode());
-}
-
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(System, arraycopy, "!(Ljava/lang/Object;ILjava/lang/Object;II)V"),
-  NATIVE_METHOD(System, arraycopyCharUnchecked, "!([CI[CII)V"),
-  NATIVE_METHOD(System, arraycopyByteUnchecked, "!([BI[BII)V"),
-  NATIVE_METHOD(System, arraycopyShortUnchecked, "!([SI[SII)V"),
-  NATIVE_METHOD(System, arraycopyIntUnchecked, "!([II[III)V"),
-  NATIVE_METHOD(System, arraycopyLongUnchecked, "!([JI[JII)V"),
-  NATIVE_METHOD(System, arraycopyFloatUnchecked, "!([FI[FII)V"),
-  NATIVE_METHOD(System, arraycopyDoubleUnchecked, "!([DI[DII)V"),
-  NATIVE_METHOD(System, arraycopyBooleanUnchecked, "!([ZI[ZII)V"),
-  NATIVE_METHOD(System, identityHashCode, "!(Ljava/lang/Object;)I"),
+  FAST_NATIVE_METHOD(System, arraycopy, "(Ljava/lang/Object;ILjava/lang/Object;II)V"),
+  FAST_NATIVE_METHOD(System, arraycopyCharUnchecked, "([CI[CII)V"),
+  FAST_NATIVE_METHOD(System, arraycopyByteUnchecked, "([BI[BII)V"),
+  FAST_NATIVE_METHOD(System, arraycopyShortUnchecked, "([SI[SII)V"),
+  FAST_NATIVE_METHOD(System, arraycopyIntUnchecked, "([II[III)V"),
+  FAST_NATIVE_METHOD(System, arraycopyLongUnchecked, "([JI[JII)V"),
+  FAST_NATIVE_METHOD(System, arraycopyFloatUnchecked, "([FI[FII)V"),
+  FAST_NATIVE_METHOD(System, arraycopyDoubleUnchecked, "([DI[DII)V"),
+  FAST_NATIVE_METHOD(System, arraycopyBooleanUnchecked, "([ZI[ZII)V"),
 };
 
 void register_java_lang_System(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 195091f..346bd30 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -187,16 +187,16 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Thread, currentThread, "!()Ljava/lang/Thread;"),
-  NATIVE_METHOD(Thread, interrupted, "!()Z"),
-  NATIVE_METHOD(Thread, isInterrupted, "!()Z"),
+  FAST_NATIVE_METHOD(Thread, currentThread, "()Ljava/lang/Thread;"),
+  FAST_NATIVE_METHOD(Thread, interrupted, "()Z"),
+  FAST_NATIVE_METHOD(Thread, isInterrupted, "()Z"),
   NATIVE_METHOD(Thread, nativeCreate, "(Ljava/lang/Thread;JZ)V"),
   NATIVE_METHOD(Thread, nativeGetStatus, "(Z)I"),
   NATIVE_METHOD(Thread, nativeHoldsLock, "(Ljava/lang/Object;)Z"),
-  NATIVE_METHOD(Thread, nativeInterrupt, "!()V"),
+  FAST_NATIVE_METHOD(Thread, nativeInterrupt, "()V"),
   NATIVE_METHOD(Thread, nativeSetName, "(Ljava/lang/String;)V"),
   NATIVE_METHOD(Thread, nativeSetPriority, "(I)V"),
-  NATIVE_METHOD(Thread, sleep, "!(Ljava/lang/Object;JI)V"),
+  FAST_NATIVE_METHOD(Thread, sleep, "(Ljava/lang/Object;JI)V"),
   NATIVE_METHOD(Thread, yield, "()V"),
 };
 
diff --git a/runtime/native/java_lang_Throwable.cc b/runtime/native/java_lang_Throwable.cc
index ff3e044..654b8a8 100644
--- a/runtime/native/java_lang_Throwable.cc
+++ b/runtime/native/java_lang_Throwable.cc
@@ -36,8 +36,8 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Throwable, nativeFillInStackTrace, "!()Ljava/lang/Object;"),
-  NATIVE_METHOD(Throwable, nativeGetStackTrace, "!(Ljava/lang/Object;)[Ljava/lang/StackTraceElement;"),
+  FAST_NATIVE_METHOD(Throwable, nativeFillInStackTrace, "()Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Throwable, nativeGetStackTrace, "(Ljava/lang/Object;)[Ljava/lang/StackTraceElement;"),
 };
 
 void register_java_lang_Throwable(JNIEnv* env) {
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index a8fa7db..54ab861 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -136,7 +136,7 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(VMClassLoader, findLoadedClass, "!(Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(VMClassLoader, findLoadedClass, "(Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/Class;"),
   NATIVE_METHOD(VMClassLoader, getBootClassPathEntries, "()[Ljava/lang/String;"),
 };
 
diff --git a/runtime/native/java_lang_Void.cc b/runtime/native/java_lang_Void.cc
new file mode 100644
index 0000000..96bfd1b
--- /dev/null
+++ b/runtime/native/java_lang_Void.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_Void.h"
+
+#include "class_linker.h"
+#include "jni_internal.h"
+#include "runtime.h"
+#include "scoped_fast_native_object_access-inl.h"
+
+namespace art {
+
+static jclass Void_lookupType(JNIEnv* env, jclass) {
+  ScopedFastNativeObjectAccess soa(env);
+  return soa.AddLocalReference<jclass>(
+      Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kPrimitiveVoid));
+}
+
+static JNINativeMethod gMethods[] = {
+  FAST_NATIVE_METHOD(Void, lookupType, "()Ljava/lang/Class;"),
+};
+
+void register_java_lang_Void(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/Void");
+}
+
+}  // namespace art
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/runtime/native/java_lang_Void.h
similarity index 65%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to runtime/native/java_lang_Void.h
index cba73b3..8777d80 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/runtime/native/java_lang_Void.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
-}
+
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_VOID_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_VOID_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_java_lang_Void(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_VOID_H_
diff --git a/runtime/native/java_lang_ref_FinalizerReference.cc b/runtime/native/java_lang_ref_FinalizerReference.cc
index ecafd0e..afedc5e 100644
--- a/runtime/native/java_lang_ref_FinalizerReference.cc
+++ b/runtime/native/java_lang_ref_FinalizerReference.cc
@@ -40,8 +40,8 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(FinalizerReference, makeCircularListIfUnenqueued, "!()Z"),
-  NATIVE_METHOD(FinalizerReference, getReferent, "!()Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(FinalizerReference, makeCircularListIfUnenqueued, "()Z"),
+  FAST_NATIVE_METHOD(FinalizerReference, getReferent, "()Ljava/lang/Object;"),
 };
 
 void register_java_lang_ref_FinalizerReference(JNIEnv* env) {
diff --git a/runtime/native/java_lang_ref_Reference.cc b/runtime/native/java_lang_ref_Reference.cc
index c778068..b1cb2f2 100644
--- a/runtime/native/java_lang_ref_Reference.cc
+++ b/runtime/native/java_lang_ref_Reference.cc
@@ -40,8 +40,8 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Reference, getReferent, "!()Ljava/lang/Object;"),
-  NATIVE_METHOD(Reference, clearReferent, "!()V"),
+  FAST_NATIVE_METHOD(Reference, getReferent, "()Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Reference, clearReferent, "()V"),
 };
 
 void register_java_lang_ref_Reference(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index d827f81..54c2109 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -72,8 +72,8 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Array, createMultiArray, "!(Ljava/lang/Class;[I)Ljava/lang/Object;"),
-  NATIVE_METHOD(Array, createObjectArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Array, createMultiArray, "(Ljava/lang/Class;[I)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Array, createObjectArray, "(Ljava/lang/Class;I)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Array(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 66a5359..fb78046 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -124,9 +124,9 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Constructor, getExceptionTypes, "!()[Ljava/lang/Class;"),
-  NATIVE_METHOD(Constructor, newInstance0, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Constructor, newInstanceFromSerialization, "!(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Constructor, getExceptionTypes, "()[Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Constructor, newInstance0, "([Ljava/lang/Object;)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Constructor, newInstanceFromSerialization, "(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index 2a39428..bc23bed 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -195,14 +195,14 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Executable, getAnnotationNative,
-                "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Executable, getDeclaredAnnotationsNative, "!()[Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Executable, getParameterAnnotationsNative,
-                "!()[[Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Executable, getParameters0, "!()[Ljava/lang/reflect/Parameter;"),
-  NATIVE_METHOD(Executable, getSignatureAnnotation, "!()[Ljava/lang/String;"),
-  NATIVE_METHOD(Executable, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
+  FAST_NATIVE_METHOD(Executable, getAnnotationNative,
+                "(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Executable, getDeclaredAnnotationsNative, "()[Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Executable, getParameterAnnotationsNative,
+                "()[[Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Executable, getParameters0, "()[Ljava/lang/reflect/Parameter;"),
+  FAST_NATIVE_METHOD(Executable, getSignatureAnnotation, "()[Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Executable, isAnnotationPresentNative, "(Ljava/lang/Class;)Z"),
 };
 
 void register_java_lang_reflect_Executable(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 374eeb5..9cf80a5 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -493,30 +493,30 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Field, get,        "!(Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Field, getBoolean, "!(Ljava/lang/Object;)Z"),
-  NATIVE_METHOD(Field, getByte,    "!(Ljava/lang/Object;)B"),
-  NATIVE_METHOD(Field, getChar,    "!(Ljava/lang/Object;)C"),
-  NATIVE_METHOD(Field, getAnnotationNative,
-                "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Field, getArtField, "!()J"),
-  NATIVE_METHOD(Field, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Field, getSignatureAnnotation, "!()[Ljava/lang/String;"),
-  NATIVE_METHOD(Field, getDouble,  "!(Ljava/lang/Object;)D"),
-  NATIVE_METHOD(Field, getFloat,   "!(Ljava/lang/Object;)F"),
-  NATIVE_METHOD(Field, getInt,     "!(Ljava/lang/Object;)I"),
-  NATIVE_METHOD(Field, getLong,    "!(Ljava/lang/Object;)J"),
-  NATIVE_METHOD(Field, getShort,   "!(Ljava/lang/Object;)S"),
-  NATIVE_METHOD(Field, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
-  NATIVE_METHOD(Field, set,        "!(Ljava/lang/Object;Ljava/lang/Object;)V"),
-  NATIVE_METHOD(Field, setBoolean, "!(Ljava/lang/Object;Z)V"),
-  NATIVE_METHOD(Field, setByte,    "!(Ljava/lang/Object;B)V"),
-  NATIVE_METHOD(Field, setChar,    "!(Ljava/lang/Object;C)V"),
-  NATIVE_METHOD(Field, setDouble,  "!(Ljava/lang/Object;D)V"),
-  NATIVE_METHOD(Field, setFloat,   "!(Ljava/lang/Object;F)V"),
-  NATIVE_METHOD(Field, setInt,     "!(Ljava/lang/Object;I)V"),
-  NATIVE_METHOD(Field, setLong,    "!(Ljava/lang/Object;J)V"),
-  NATIVE_METHOD(Field, setShort,   "!(Ljava/lang/Object;S)V"),
+  FAST_NATIVE_METHOD(Field, get,        "(Ljava/lang/Object;)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Field, getBoolean, "(Ljava/lang/Object;)Z"),
+  FAST_NATIVE_METHOD(Field, getByte,    "(Ljava/lang/Object;)B"),
+  FAST_NATIVE_METHOD(Field, getChar,    "(Ljava/lang/Object;)C"),
+  FAST_NATIVE_METHOD(Field, getAnnotationNative,
+                "(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Field, getArtField, "()J"),
+  FAST_NATIVE_METHOD(Field, getDeclaredAnnotations, "()[Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Field, getSignatureAnnotation, "()[Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Field, getDouble,  "(Ljava/lang/Object;)D"),
+  FAST_NATIVE_METHOD(Field, getFloat,   "(Ljava/lang/Object;)F"),
+  FAST_NATIVE_METHOD(Field, getInt,     "(Ljava/lang/Object;)I"),
+  FAST_NATIVE_METHOD(Field, getLong,    "(Ljava/lang/Object;)J"),
+  FAST_NATIVE_METHOD(Field, getShort,   "(Ljava/lang/Object;)S"),
+  FAST_NATIVE_METHOD(Field, isAnnotationPresentNative, "(Ljava/lang/Class;)Z"),
+  FAST_NATIVE_METHOD(Field, set,        "(Ljava/lang/Object;Ljava/lang/Object;)V"),
+  FAST_NATIVE_METHOD(Field, setBoolean, "(Ljava/lang/Object;Z)V"),
+  FAST_NATIVE_METHOD(Field, setByte,    "(Ljava/lang/Object;B)V"),
+  FAST_NATIVE_METHOD(Field, setChar,    "(Ljava/lang/Object;C)V"),
+  FAST_NATIVE_METHOD(Field, setDouble,  "(Ljava/lang/Object;D)V"),
+  FAST_NATIVE_METHOD(Field, setFloat,   "(Ljava/lang/Object;F)V"),
+  FAST_NATIVE_METHOD(Field, setInt,     "(Ljava/lang/Object;I)V"),
+  FAST_NATIVE_METHOD(Field, setLong,    "(Ljava/lang/Object;J)V"),
+  FAST_NATIVE_METHOD(Field, setShort,   "(Ljava/lang/Object;S)V"),
 };
 
 void register_java_lang_reflect_Field(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index a6589bc..6e5e3d9 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -84,9 +84,9 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Method, getDefaultValue, "!()Ljava/lang/Object;"),
-  NATIVE_METHOD(Method, getExceptionTypes, "!()[Ljava/lang/Class;"),
-  NATIVE_METHOD(Method, invoke, "!(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Method, getDefaultValue, "()Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Method, getExceptionTypes, "()[Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Method, invoke, "(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Method(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Parameter.cc b/runtime/native/java_lang_reflect_Parameter.cc
index 0bb9e38..37aa16c 100644
--- a/runtime/native/java_lang_reflect_Parameter.cc
+++ b/runtime/native/java_lang_reflect_Parameter.cc
@@ -63,9 +63,9 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Parameter,
+  FAST_NATIVE_METHOD(Parameter,
                 getAnnotationNative,
-                "!(Ljava/lang/reflect/Executable;ILjava/lang/Class;)Ljava/lang/annotation/Annotation;"),
+                "(Ljava/lang/reflect/Executable;ILjava/lang/Class;)Ljava/lang/annotation/Annotation;"),
 };
 
 void register_java_lang_reflect_Parameter(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Proxy.cc b/runtime/native/java_lang_reflect_Proxy.cc
index 70cd6aa..0279b5f 100644
--- a/runtime/native/java_lang_reflect_Proxy.cc
+++ b/runtime/native/java_lang_reflect_Proxy.cc
@@ -35,7 +35,7 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Proxy, generateProxy, "!(Ljava/lang/String;[Ljava/lang/Class;Ljava/lang/ClassLoader;[Ljava/lang/reflect/Method;[[Ljava/lang/Class;)Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Proxy, generateProxy, "(Ljava/lang/String;[Ljava/lang/Class;Ljava/lang/ClassLoader;[Ljava/lang/reflect/Method;[[Ljava/lang/Class;)Ljava/lang/Class;"),
 };
 
 void register_java_lang_reflect_Proxy(JNIEnv* env) {
diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc
index e51b6d2..4138ccc 100644
--- a/runtime/native/libcore_util_CharsetUtils.cc
+++ b/runtime/native/libcore_util_CharsetUtils.cc
@@ -249,11 +249,11 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "!([BII[C)V"),
-  NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "!([BII[C)V"),
-  NATIVE_METHOD(CharsetUtils, toAsciiBytes, "!(Ljava/lang/String;II)[B"),
-  NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "!(Ljava/lang/String;II)[B"),
-  NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "!(Ljava/lang/String;II)[B"),
+  FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"),
+  FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"),
+  FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"),
+  FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"),
+  FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"),
 };
 
 void register_libcore_util_CharsetUtils(JNIEnv* env) {
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
index 5356498..5809708 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
@@ -33,7 +33,7 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(DdmServer, nativeSendChunk, "!(I[BII)V"),
+  FAST_NATIVE_METHOD(DdmServer, nativeSendChunk, "(I[BII)V"),
 };
 
 void register_org_apache_harmony_dalvik_ddmc_DdmServer(JNIEnv* env) {
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index ca17c26..69ef59e 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -165,11 +165,11 @@
 
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DdmVmInternal, enableRecentAllocations, "(Z)V"),
-  NATIVE_METHOD(DdmVmInternal, getRecentAllocations, "!()[B"),
-  NATIVE_METHOD(DdmVmInternal, getRecentAllocationStatus, "!()Z"),
+  FAST_NATIVE_METHOD(DdmVmInternal, getRecentAllocations, "()[B"),
+  FAST_NATIVE_METHOD(DdmVmInternal, getRecentAllocationStatus, "()Z"),
   NATIVE_METHOD(DdmVmInternal, getStackTraceById, "(I)[Ljava/lang/StackTraceElement;"),
   NATIVE_METHOD(DdmVmInternal, getThreadStats, "()[B"),
-  NATIVE_METHOD(DdmVmInternal, heapInfoNotify, "!(I)Z"),
+  FAST_NATIVE_METHOD(DdmVmInternal, heapInfoNotify, "(I)Z"),
   NATIVE_METHOD(DdmVmInternal, heapSegmentNotify, "(IIZ)Z"),
   NATIVE_METHOD(DdmVmInternal, threadNotify, "(Z)V"),
 };
diff --git a/runtime/native/scoped_fast_native_object_access-inl.h b/runtime/native/scoped_fast_native_object_access-inl.h
index 1d73813..50a554c 100644
--- a/runtime/native/scoped_fast_native_object_access-inl.h
+++ b/runtime/native/scoped_fast_native_object_access-inl.h
@@ -27,7 +27,7 @@
 inline ScopedFastNativeObjectAccess::ScopedFastNativeObjectAccess(JNIEnv* env)
     : ScopedObjectAccessAlreadyRunnable(env) {
   Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK((*Self()->GetManagedStack()->GetTopQuickFrame())->IsFastNative());
+  DCHECK((*Self()->GetManagedStack()->GetTopQuickFrame())->IsAnnotatedWithFastNative());
   // Don't work with raw objects in non-runnable states.
   DCHECK_EQ(Self()->GetState(), kRunnable);
 }
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 644df07..cc5a41a 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -492,69 +492,69 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
-  NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
-  NATIVE_METHOD(Unsafe, compareAndSwapObject, "!(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z"),
-  NATIVE_METHOD(Unsafe, getIntVolatile, "!(Ljava/lang/Object;J)I"),
-  NATIVE_METHOD(Unsafe, putIntVolatile, "!(Ljava/lang/Object;JI)V"),
-  NATIVE_METHOD(Unsafe, getLongVolatile, "!(Ljava/lang/Object;J)J"),
-  NATIVE_METHOD(Unsafe, putLongVolatile, "!(Ljava/lang/Object;JJ)V"),
-  NATIVE_METHOD(Unsafe, getObjectVolatile, "!(Ljava/lang/Object;J)Ljava/lang/Object;"),
-  NATIVE_METHOD(Unsafe, putObjectVolatile, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
-  NATIVE_METHOD(Unsafe, getInt, "!(Ljava/lang/Object;J)I"),
-  NATIVE_METHOD(Unsafe, putInt, "!(Ljava/lang/Object;JI)V"),
-  NATIVE_METHOD(Unsafe, putOrderedInt, "!(Ljava/lang/Object;JI)V"),
-  NATIVE_METHOD(Unsafe, getLong, "!(Ljava/lang/Object;J)J"),
-  NATIVE_METHOD(Unsafe, putLong, "!(Ljava/lang/Object;JJ)V"),
-  NATIVE_METHOD(Unsafe, putOrderedLong, "!(Ljava/lang/Object;JJ)V"),
-  NATIVE_METHOD(Unsafe, getObject, "!(Ljava/lang/Object;J)Ljava/lang/Object;"),
-  NATIVE_METHOD(Unsafe, putObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
-  NATIVE_METHOD(Unsafe, putOrderedObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
-  NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "!(Ljava/lang/Class;)I"),
-  NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "!(Ljava/lang/Class;)I"),
-  NATIVE_METHOD(Unsafe, addressSize, "!()I"),
-  NATIVE_METHOD(Unsafe, pageSize, "!()I"),
-  NATIVE_METHOD(Unsafe, allocateMemory, "!(J)J"),
-  NATIVE_METHOD(Unsafe, freeMemory, "!(J)V"),
-  NATIVE_METHOD(Unsafe, setMemory, "!(JJB)V"),
-  NATIVE_METHOD(Unsafe, copyMemory, "!(JJJ)V"),
-  NATIVE_METHOD(Unsafe, copyMemoryToPrimitiveArray, "!(JLjava/lang/Object;JJ)V"),
-  NATIVE_METHOD(Unsafe, copyMemoryFromPrimitiveArray, "!(Ljava/lang/Object;JJJ)V"),
-  NATIVE_METHOD(Unsafe, getBoolean, "!(Ljava/lang/Object;J)Z"),
+  FAST_NATIVE_METHOD(Unsafe, compareAndSwapInt, "(Ljava/lang/Object;JII)Z"),
+  FAST_NATIVE_METHOD(Unsafe, compareAndSwapLong, "(Ljava/lang/Object;JJJ)Z"),
+  FAST_NATIVE_METHOD(Unsafe, compareAndSwapObject, "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z"),
+  FAST_NATIVE_METHOD(Unsafe, getIntVolatile, "(Ljava/lang/Object;J)I"),
+  FAST_NATIVE_METHOD(Unsafe, putIntVolatile, "(Ljava/lang/Object;JI)V"),
+  FAST_NATIVE_METHOD(Unsafe, getLongVolatile, "(Ljava/lang/Object;J)J"),
+  FAST_NATIVE_METHOD(Unsafe, putLongVolatile, "(Ljava/lang/Object;JJ)V"),
+  FAST_NATIVE_METHOD(Unsafe, getObjectVolatile, "(Ljava/lang/Object;J)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Unsafe, putObjectVolatile, "(Ljava/lang/Object;JLjava/lang/Object;)V"),
+  FAST_NATIVE_METHOD(Unsafe, getInt, "(Ljava/lang/Object;J)I"),
+  FAST_NATIVE_METHOD(Unsafe, putInt, "(Ljava/lang/Object;JI)V"),
+  FAST_NATIVE_METHOD(Unsafe, putOrderedInt, "(Ljava/lang/Object;JI)V"),
+  FAST_NATIVE_METHOD(Unsafe, getLong, "(Ljava/lang/Object;J)J"),
+  FAST_NATIVE_METHOD(Unsafe, putLong, "(Ljava/lang/Object;JJ)V"),
+  FAST_NATIVE_METHOD(Unsafe, putOrderedLong, "(Ljava/lang/Object;JJ)V"),
+  FAST_NATIVE_METHOD(Unsafe, getObject, "(Ljava/lang/Object;J)Ljava/lang/Object;"),
+  FAST_NATIVE_METHOD(Unsafe, putObject, "(Ljava/lang/Object;JLjava/lang/Object;)V"),
+  FAST_NATIVE_METHOD(Unsafe, putOrderedObject, "(Ljava/lang/Object;JLjava/lang/Object;)V"),
+  FAST_NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "(Ljava/lang/Class;)I"),
+  FAST_NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "(Ljava/lang/Class;)I"),
+  FAST_NATIVE_METHOD(Unsafe, addressSize, "()I"),
+  FAST_NATIVE_METHOD(Unsafe, pageSize, "()I"),
+  FAST_NATIVE_METHOD(Unsafe, allocateMemory, "(J)J"),
+  FAST_NATIVE_METHOD(Unsafe, freeMemory, "(J)V"),
+  FAST_NATIVE_METHOD(Unsafe, setMemory, "(JJB)V"),
+  FAST_NATIVE_METHOD(Unsafe, copyMemory, "(JJJ)V"),
+  FAST_NATIVE_METHOD(Unsafe, copyMemoryToPrimitiveArray, "(JLjava/lang/Object;JJ)V"),
+  FAST_NATIVE_METHOD(Unsafe, copyMemoryFromPrimitiveArray, "(Ljava/lang/Object;JJJ)V"),
+  FAST_NATIVE_METHOD(Unsafe, getBoolean, "(Ljava/lang/Object;J)Z"),
 
-  NATIVE_METHOD(Unsafe, getByte, "!(Ljava/lang/Object;J)B"),
-  NATIVE_METHOD(Unsafe, getChar, "!(Ljava/lang/Object;J)C"),
-  NATIVE_METHOD(Unsafe, getShort, "!(Ljava/lang/Object;J)S"),
-  NATIVE_METHOD(Unsafe, getFloat, "!(Ljava/lang/Object;J)F"),
-  NATIVE_METHOD(Unsafe, getDouble, "!(Ljava/lang/Object;J)D"),
-  NATIVE_METHOD(Unsafe, putBoolean, "!(Ljava/lang/Object;JZ)V"),
-  NATIVE_METHOD(Unsafe, putByte, "!(Ljava/lang/Object;JB)V"),
-  NATIVE_METHOD(Unsafe, putChar, "!(Ljava/lang/Object;JC)V"),
-  NATIVE_METHOD(Unsafe, putShort, "!(Ljava/lang/Object;JS)V"),
-  NATIVE_METHOD(Unsafe, putFloat, "!(Ljava/lang/Object;JF)V"),
-  NATIVE_METHOD(Unsafe, putDouble, "!(Ljava/lang/Object;JD)V"),
+  FAST_NATIVE_METHOD(Unsafe, getByte, "(Ljava/lang/Object;J)B"),
+  FAST_NATIVE_METHOD(Unsafe, getChar, "(Ljava/lang/Object;J)C"),
+  FAST_NATIVE_METHOD(Unsafe, getShort, "(Ljava/lang/Object;J)S"),
+  FAST_NATIVE_METHOD(Unsafe, getFloat, "(Ljava/lang/Object;J)F"),
+  FAST_NATIVE_METHOD(Unsafe, getDouble, "(Ljava/lang/Object;J)D"),
+  FAST_NATIVE_METHOD(Unsafe, putBoolean, "(Ljava/lang/Object;JZ)V"),
+  FAST_NATIVE_METHOD(Unsafe, putByte, "(Ljava/lang/Object;JB)V"),
+  FAST_NATIVE_METHOD(Unsafe, putChar, "(Ljava/lang/Object;JC)V"),
+  FAST_NATIVE_METHOD(Unsafe, putShort, "(Ljava/lang/Object;JS)V"),
+  FAST_NATIVE_METHOD(Unsafe, putFloat, "(Ljava/lang/Object;JF)V"),
+  FAST_NATIVE_METHOD(Unsafe, putDouble, "(Ljava/lang/Object;JD)V"),
 
   // Each of the getFoo variants are overloaded with a call that operates
   // directively on a native pointer.
-  OVERLOADED_NATIVE_METHOD(Unsafe, getByte, "!(J)B", getByteJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, getChar, "!(J)C", getCharJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, getShort, "!(J)S", getShortJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, getInt, "!(J)I", getIntJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, getLong, "!(J)J", getLongJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, getFloat, "!(J)F", getFloatJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, getDouble, "!(J)D", getDoubleJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putByte, "!(JB)V", putByteJB),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putChar, "!(JC)V", putCharJC),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putShort, "!(JS)V", putShortJS),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putInt, "!(JI)V", putIntJI),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putLong, "!(JJ)V", putLongJJ),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putFloat, "!(JF)V", putFloatJF),
-  OVERLOADED_NATIVE_METHOD(Unsafe, putDouble, "!(JD)V", putDoubleJD),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getByte, "(J)B", getByteJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getChar, "(J)C", getCharJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getShort, "(J)S", getShortJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getInt, "(J)I", getIntJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getLong, "(J)J", getLongJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getFloat, "(J)F", getFloatJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, getDouble, "(J)D", getDoubleJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putByte, "(JB)V", putByteJB),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putChar, "(JC)V", putCharJC),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putShort, "(JS)V", putShortJS),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putInt, "(JI)V", putIntJI),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putLong, "(JJ)V", putLongJJ),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putFloat, "(JF)V", putFloatJF),
+  OVERLOADED_FAST_NATIVE_METHOD(Unsafe, putDouble, "(JD)V", putDoubleJD),
 
   // CAS
-  NATIVE_METHOD(Unsafe, loadFence, "!()V"),
-  NATIVE_METHOD(Unsafe, storeFence, "!()V"),
-  NATIVE_METHOD(Unsafe, fullFence, "!()V"),
+  FAST_NATIVE_METHOD(Unsafe, loadFence, "()V"),
+  FAST_NATIVE_METHOD(Unsafe, storeFence, "()V"),
+  FAST_NATIVE_METHOD(Unsafe, fullFence, "()V"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/non_debuggable_classes.cc b/runtime/non_debuggable_classes.cc
new file mode 100644
index 0000000..829ea65
--- /dev/null
+++ b/runtime/non_debuggable_classes.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "non_debuggable_classes.h"
+
+#include "base/logging.h"
+#include "jni_internal.h"
+#include "mirror/class-inl.h"
+#include "obj_ptr-inl.h"
+#include "ScopedLocalRef.h"
+#include "thread-inl.h"
+
+namespace art {
+
+std::vector<jclass>  NonDebuggableClasses::non_debuggable_classes;
+
+void NonDebuggableClasses::AddNonDebuggableClass(jclass klass) {
+  Thread* self = Thread::Current();
+  JNIEnvExt* env = self->GetJniEnv();
+  ObjPtr<mirror::Class> mirror_klass(self->DecodeJObject(klass)->AsClass());
+  for (jclass c : non_debuggable_classes) {
+    if (self->DecodeJObject(c)->AsClass() == mirror_klass.Ptr()) {
+      return;
+    }
+  }
+  non_debuggable_classes.push_back(reinterpret_cast<jclass>(env->NewGlobalRef(klass)));
+}
+
+}  // namespace art
diff --git a/runtime/non_debuggable_classes.h b/runtime/non_debuggable_classes.h
new file mode 100644
index 0000000..e1b5633
--- /dev/null
+++ b/runtime/non_debuggable_classes.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NON_DEBUGGABLE_CLASSES_H_
+#define ART_RUNTIME_NON_DEBUGGABLE_CLASSES_H_
+
+#include <vector>
+
+#include "base/mutex.h"
+#include "jni.h"
+
+namespace art {
+
+struct NonDebuggableClasses {
+ public:
+  static const std::vector<jclass>& GetNonDebuggableClasses() {
+    return non_debuggable_classes;
+  }
+
+  static void AddNonDebuggableClass(jclass klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  static std::vector<jclass> non_debuggable_classes;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NON_DEBUGGABLE_CLASSES_H_
diff --git a/runtime/oat.h b/runtime/oat.h
index 1544121..df43107 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '4', '\0' };  // hash-based DexCache types.
+  static constexpr uint8_t kOatVersion[] = { '1', '1', '5', '\0' };  // hash-based DexCache fields
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 5ae2fc5..1735045 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -430,8 +430,7 @@
       // starts up.
       LOG(WARNING) << "Dex location " << dex_location_ << " does not seem to include dex file. "
         << "Allow oat file use. This is potentially dangerous.";
-    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
-        != GetCombinedImageChecksum()) {
+    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum() != image_info->oat_checksum) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
       return kOatBootImageOutOfDate;
     }
@@ -726,68 +725,57 @@
   return required_dex_checksums_found_ ? &cached_required_dex_checksums_ : nullptr;
 }
 
+// TODO: Use something better than xor for the combined image checksum.
+std::unique_ptr<OatFileAssistant::ImageInfo>
+OatFileAssistant::ImageInfo::GetRuntimeImageInfo(InstructionSet isa, std::string* error_msg) {
+  CHECK(error_msg != nullptr);
+
+  // Use the currently loaded image to determine the image locations for all
+  // the image spaces, regardless of the isa requested. Otherwise we would
+  // need to read from the boot image's oat file to determine the rest of the
+  // image locations in the case of multi-image.
+  Runtime* runtime = Runtime::Current();
+  std::vector<gc::space::ImageSpace*> image_spaces = runtime->GetHeap()->GetBootImageSpaces();
+  if (image_spaces.empty()) {
+    *error_msg = "There are no boot image spaces";
+    return nullptr;
+  }
+
+  std::unique_ptr<ImageInfo> info(new ImageInfo());
+  info->location = image_spaces[0]->GetImageLocation();
+
+  // TODO: Special casing on isa == kRuntimeISA is presumably motivated by
+  // performance: 'it's faster to use an already loaded image header than read
+  // the image header from disk'. But the loaded image is not necessarily the
+  // same as kRuntimeISA, so this behavior is suspect (b/35659889).
+  if (isa == kRuntimeISA) {
+    const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
+    info->oat_checksum = image_header.GetOatChecksum();
+    info->oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
+    info->patch_delta = image_header.GetPatchDelta();
+  } else {
+    std::unique_ptr<ImageHeader> image_header(
+        gc::space::ImageSpace::ReadImageHeader(info->location.c_str(), isa, error_msg));
+    if (image_header == nullptr) {
+      return nullptr;
+    }
+    info->oat_checksum = image_header->GetOatChecksum();
+    info->oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
+    info->patch_delta = image_header->GetPatchDelta();
+  }
+  return info;
+}
+
 const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() {
   if (!image_info_load_attempted_) {
     image_info_load_attempted_ = true;
-
-    Runtime* runtime = Runtime::Current();
-    std::vector<gc::space::ImageSpace*> image_spaces = runtime->GetHeap()->GetBootImageSpaces();
-    if (!image_spaces.empty()) {
-      cached_image_info_.location = image_spaces[0]->GetImageLocation();
-
-      if (isa_ == kRuntimeISA) {
-        const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
-        cached_image_info_.oat_checksum = image_header.GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
-            image_header.GetOatDataBegin());
-        cached_image_info_.patch_delta = image_header.GetPatchDelta();
-      } else {
-        std::string error_msg;
-        std::unique_ptr<ImageHeader> image_header(
-            gc::space::ImageSpace::ReadImageHeader(cached_image_info_.location.c_str(),
-                                                   isa_,
-                                                   &error_msg));
-        CHECK(image_header != nullptr) << error_msg;
-        cached_image_info_.oat_checksum = image_header->GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
-            image_header->GetOatDataBegin());
-        cached_image_info_.patch_delta = image_header->GetPatchDelta();
-      }
-    }
-    image_info_load_succeeded_ = (!image_spaces.empty());
-
-    combined_image_checksum_ = CalculateCombinedImageChecksum(isa_);
-  }
-  return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
-}
-
-// TODO: Use something better than xor.
-uint32_t OatFileAssistant::CalculateCombinedImageChecksum(InstructionSet isa) {
-  uint32_t checksum = 0;
-  std::vector<gc::space::ImageSpace*> image_spaces =
-      Runtime::Current()->GetHeap()->GetBootImageSpaces();
-  if (isa == kRuntimeISA) {
-    for (gc::space::ImageSpace* image_space : image_spaces) {
-      checksum ^= image_space->GetImageHeader().GetOatChecksum();
-    }
-  } else {
-    for (gc::space::ImageSpace* image_space : image_spaces) {
-      std::string location = image_space->GetImageLocation();
-      std::string error_msg;
-      std::unique_ptr<ImageHeader> image_header(
-          gc::space::ImageSpace::ReadImageHeader(location.c_str(), isa, &error_msg));
-      CHECK(image_header != nullptr) << error_msg;
-      checksum ^= image_header->GetOatChecksum();
+    std::string error_msg;
+    cached_image_info_ = ImageInfo::GetRuntimeImageInfo(isa_, &error_msg);
+    if (cached_image_info_ == nullptr) {
+      LOG(WARNING) << "Unable to get runtime image info: " << error_msg;
     }
   }
-  return checksum;
-}
-
-uint32_t OatFileAssistant::GetCombinedImageChecksum() {
-  if (!image_info_load_attempted_) {
-    GetImageInfo();
-  }
-  return combined_image_checksum_;
+  return cached_image_info_.get();
 }
 
 OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 3ede29f..d61e994 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -276,14 +276,15 @@
                                        std::string* oat_filename,
                                        std::string* error_msg);
 
-  static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
-
  private:
   struct ImageInfo {
     uint32_t oat_checksum = 0;
     uintptr_t oat_data_begin = 0;
     int32_t patch_delta = 0;
     std::string location;
+
+    static std::unique_ptr<ImageInfo> GetRuntimeImageInfo(InstructionSet isa,
+                                                          std::string* error_msg);
   };
 
   class OatFileInfo {
@@ -414,8 +415,6 @@
   // The caller shouldn't clean up or free the returned pointer.
   const ImageInfo* GetImageInfo();
 
-  uint32_t GetCombinedImageChecksum();
-
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
@@ -445,9 +444,7 @@
   // TODO: The image info should probably be moved out of the oat file
   // assistant to an image file manager.
   bool image_info_load_attempted_ = false;
-  bool image_info_load_succeeded_ = false;
-  ImageInfo cached_image_info_;
-  uint32_t combined_image_checksum_ = 0;
+  std::unique_ptr<ImageInfo> cached_image_info_;
 
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 7079614..d04dbbe 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -23,6 +23,7 @@
 #include "android-base/stringprintf.h"
 
 #include "art_field-inl.h"
+#include "base/bit_vector-inl.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -145,13 +146,52 @@
   return oat_files;
 }
 
+class TypeIndexInfo {
+ public:
+  explicit TypeIndexInfo(const DexFile* dex_file)
+      : type_indexes_(GenerateTypeIndexes(dex_file)),
+        iter_(type_indexes_.Indexes().begin()),
+        end_(type_indexes_.Indexes().end()) { }
+
+  BitVector& GetTypeIndexes() {
+    return type_indexes_;
+  }
+  BitVector::IndexIterator& GetIterator() {
+    return iter_;
+  }
+  BitVector::IndexIterator& GetIteratorEnd() {
+    return end_;
+  }
+  void AdvanceIterator() {
+    iter_++;
+  }
+
+ private:
+  static BitVector GenerateTypeIndexes(const DexFile* dex_file) {
+    BitVector type_indexes(/*start_bits*/0, /*expandable*/true, Allocator::GetMallocAllocator());
+    for (uint16_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+      uint16_t type_idx = class_def.class_idx_.index_;
+      type_indexes.SetBit(type_idx);
+    }
+    return type_indexes;
+  }
+
+  // BitVector with bits set for the type indexes of all classes in the input dex file.
+  BitVector type_indexes_;
+  BitVector::IndexIterator iter_;
+  BitVector::IndexIterator end_;
+};
+
 class DexFileAndClassPair : ValueObject {
  public:
-  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
-     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
+  DexFileAndClassPair(const DexFile* dex_file, TypeIndexInfo* type_info, bool from_loaded_oat)
+     : type_info_(type_info),
        dex_file_(dex_file),
-       current_class_index_(current_class_index),
-       from_loaded_oat_(from_loaded_oat) {}
+       cached_descriptor_(dex_file_->StringByTypeIdx(dex::TypeIndex(*type_info->GetIterator()))),
+       from_loaded_oat_(from_loaded_oat) {
+    type_info_->AdvanceIterator();
+  }
 
   DexFileAndClassPair(const DexFileAndClassPair& rhs) = default;
 
@@ -172,16 +212,12 @@
   }
 
   bool DexFileHasMoreClasses() const {
-    return current_class_index_ + 1 < dex_file_->NumClassDefs();
+    return type_info_->GetIterator() != type_info_->GetIteratorEnd();
   }
 
   void Next() {
-    ++current_class_index_;
-    cached_descriptor_ = GetClassDescriptor(dex_file_, current_class_index_);
-  }
-
-  size_t GetCurrentClassIndex() const {
-    return current_class_index_;
+    cached_descriptor_ = dex_file_->StringByTypeIdx(dex::TypeIndex(*type_info_->GetIterator()));
+    type_info_->AdvanceIterator();
   }
 
   bool FromLoadedOat() const {
@@ -193,42 +229,36 @@
   }
 
  private:
-  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
-    DCHECK(IsUint<16>(index));
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
-    return dex_file->StringByTypeIdx(class_def.class_idx_);
-  }
-
-  const char* cached_descriptor_;
+  TypeIndexInfo* type_info_;
   const DexFile* dex_file_;
-  size_t current_class_index_;
+  const char* cached_descriptor_;
   bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
                           // and what was loaded before. Any old duplicates must have been
                           // OK, and any new "internal" duplicates are as well (they must
                           // be from multidex, which resolves correctly).
 };
 
-static void AddDexFilesFromOat(const OatFile* oat_file,
-                               bool already_loaded,
-                               /*out*/std::priority_queue<DexFileAndClassPair>* heap,
-                               std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+static void AddDexFilesFromOat(
+    const OatFile* oat_file,
+    /*out*/std::vector<const DexFile*>* dex_files,
+    std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
   for (const OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
     std::string error;
     std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
     if (dex_file == nullptr) {
       LOG(WARNING) << "Could not create dex file from oat file: " << error;
     } else if (dex_file->NumClassDefs() > 0U) {
-      heap->emplace(dex_file.get(), /*current_class_index*/0U, already_loaded);
+      dex_files->push_back(dex_file.get());
       opened_dex_files->push_back(std::move(dex_file));
     }
   }
 }
 
-static void AddNext(/*inout*/DexFileAndClassPair* original,
-                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap) {
-  if (original->DexFileHasMoreClasses()) {
-    original->Next();
-    heap->push(std::move(*original));
+static void AddNext(/*inout*/DexFileAndClassPair& original,
+                    /*inout*/std::priority_queue<DexFileAndClassPair>& heap) {
+  if (original.DexFileHasMoreClasses()) {
+    original.Next();
+    heap.push(std::move(original));
   }
 }
 
@@ -297,7 +327,8 @@
 static bool GetDexFilesFromClassLoader(
     ScopedObjectAccessAlreadyRunnable& soa,
     mirror::ClassLoader* class_loader,
-    std::priority_queue<DexFileAndClassPair>* queue) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::vector<const DexFile*>* dex_files)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   if (ClassLinker::IsBootClassLoader(soa, class_loader)) {
     // The boot class loader. We don't load any of these files, as we know we compiled against
     // them correctly.
@@ -312,7 +343,7 @@
     return false;
   }
 
-  bool recursive_result = GetDexFilesFromClassLoader(soa, class_loader->GetParent(), queue);
+  bool recursive_result = GetDexFilesFromClassLoader(soa, class_loader->GetParent(), dex_files);
   if (!recursive_result) {
     // Something wrong up the chain.
     return false;
@@ -322,7 +353,7 @@
   auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
             REQUIRES_SHARED(Locks::mutator_lock_) {
     if (cp_dex_file->NumClassDefs() > 0) {
-      queue->emplace(cp_dex_file, 0U, true);
+      dex_files->push_back(cp_dex_file);
     }
     return true;  // Continue looking.
   };
@@ -341,7 +372,8 @@
 static void GetDexFilesFromDexElementsArray(
     ScopedObjectAccessAlreadyRunnable& soa,
     Handle<mirror::ObjectArray<mirror::Object>> dex_elements,
-    std::priority_queue<DexFileAndClassPair>* queue) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::vector<const DexFile*>* dex_files)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   if (dex_elements == nullptr) {
     // Nothing to do.
     return;
@@ -360,7 +392,7 @@
   auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (cp_dex_file != nullptr && cp_dex_file->NumClassDefs() > 0) {
-      queue->emplace(cp_dex_file, 0U, true);
+      dex_files->push_back(cp_dex_file);
     }
     return true;  // Continue looking.
   };
@@ -389,43 +421,95 @@
 }
 
 static bool AreSharedLibrariesOk(const std::string& shared_libraries,
-                                 std::priority_queue<DexFileAndClassPair>& queue) {
+                                 std::vector<const DexFile*>& dex_files) {
+  // If no shared libraries, we expect no dex files.
   if (shared_libraries.empty()) {
-    if (queue.empty()) {
-      // No shared libraries or oat files, as expected.
-      return true;
-    }
-  } else {
-    if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
-      // If we find the special shared library, skip the shared libraries check.
-      return true;
-    }
-    // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
-    std::vector<std::string> shared_libraries_split;
-    Split(shared_libraries, '*', &shared_libraries_split);
-
-    size_t index = 0;
-    std::priority_queue<DexFileAndClassPair> temp = queue;
-    while (!temp.empty() && index < shared_libraries_split.size() - 1) {
-      DexFileAndClassPair pair(temp.top());
-      const DexFile* dex_file = pair.GetDexFile();
-      const std::string& dex_filename = dex_file->GetLocation();
-      if (dex_filename != shared_libraries_split[index]) {
-        break;
-      }
-      char* end;
-      size_t shared_lib_checksum = strtoul(shared_libraries_split[index + 1].c_str(), &end, 10);
-      uint32_t dex_checksum = dex_file->GetLocationChecksum();
-      if (*end != '\0' || dex_checksum != shared_lib_checksum) {
-        break;
-      }
-      temp.pop();
-      index += 2;
-    }
-
-    // Check is successful if it made it through the queue and all the shared libraries.
-    return temp.empty() && index == shared_libraries_split.size();
+    return dex_files.empty();
   }
+  // If we find the special shared library, skip the shared libraries check.
+  if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
+    return true;
+  }
+  // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
+  std::vector<std::string> shared_libraries_split;
+  Split(shared_libraries, '*', &shared_libraries_split);
+
+  // Sanity check size of dex files and split shared libraries. Should be 2x as many entries in
+  // the split shared libraries since it contains pairs of filename/checksum.
+  if (dex_files.size() * 2 != shared_libraries_split.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < dex_files.size(); ++i) {
+    if (dex_files[i]->GetLocation() != shared_libraries_split[i * 2]) {
+      return false;
+    }
+    char* end;
+    size_t shared_lib_checksum = strtoul(shared_libraries_split[i * 2 + 1].c_str(), &end, 10);
+    uint32_t dex_checksum = dex_files[i]->GetLocationChecksum();
+    if (*end != '\0' || dex_checksum != shared_lib_checksum) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static bool CollisionCheck(std::vector<const DexFile*>& dex_files_loaded,
+                           std::vector<const DexFile*>& dex_files_unloaded,
+                           std::string* error_msg /*out*/) {
+  // Generate type index information for each dex file.
+  std::vector<TypeIndexInfo> loaded_types;
+  for (const DexFile* dex_file : dex_files_loaded) {
+    loaded_types.push_back(TypeIndexInfo(dex_file));
+  }
+  std::vector<TypeIndexInfo> unloaded_types;
+  for (const DexFile* dex_file : dex_files_unloaded) {
+    unloaded_types.push_back(TypeIndexInfo(dex_file));
+  }
+
+  // Populate the queue of dex file and class pairs with the loaded and unloaded dex files.
+  std::priority_queue<DexFileAndClassPair> queue;
+  for (size_t i = 0; i < dex_files_loaded.size(); ++i) {
+    if (loaded_types[i].GetIterator() != loaded_types[i].GetIteratorEnd()) {
+      queue.emplace(dex_files_loaded[i], &loaded_types[i], /*from_loaded_oat*/true);
+    }
+  }
+  for (size_t i = 0; i < dex_files_unloaded.size(); ++i) {
+    if (unloaded_types[i].GetIterator() != unloaded_types[i].GetIteratorEnd()) {
+      queue.emplace(dex_files_unloaded[i], &unloaded_types[i], /*from_loaded_oat*/false);
+    }
+  }
+
+  // Now drain the queue.
+  while (!queue.empty()) {
+    // Modifying the top element is only safe if we pop right after.
+    DexFileAndClassPair compare_pop(queue.top());
+    queue.pop();
+
+    // Compare against the following elements.
+    while (!queue.empty()) {
+      DexFileAndClassPair top(queue.top());
+      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
+        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
+        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
+          *error_msg =
+              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
+                           compare_pop.GetCachedDescriptor(),
+                           compare_pop.GetDexFile()->GetLocation().c_str(),
+                           top.GetDexFile()->GetLocation().c_str());
+          return true;
+        }
+        queue.pop();
+        AddNext(top, queue);
+      } else {
+        // Something else. Done here.
+        break;
+      }
+    }
+    AddNext(compare_pop, queue);
+  }
+
   return false;
 }
 
@@ -450,7 +534,7 @@
   DCHECK(oat_file != nullptr);
   DCHECK(error_msg != nullptr);
 
-  std::priority_queue<DexFileAndClassPair> queue;
+  std::vector<const DexFile*> dex_files_loaded;
 
   // Try to get dex files from the given class loader. If the class loader is null, or we do
   // not support one of the class loaders in the chain, conservatively compare against all
@@ -464,12 +548,12 @@
     Handle<mirror::ObjectArray<mirror::Object>> h_dex_elements =
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Object>>(dex_elements));
     if (h_class_loader != nullptr &&
-        GetDexFilesFromClassLoader(soa, h_class_loader.Get(), &queue)) {
+        GetDexFilesFromClassLoader(soa, h_class_loader.Get(), &dex_files_loaded)) {
       class_loader_ok = true;
 
       // In this case, also take into account the dex_elements array, if given. We don't need to
       // read it otherwise, as we'll compare against all open oat files anyways.
-      GetDexFilesFromDexElementsArray(soa, h_dex_elements, &queue);
+      GetDexFilesFromDexElementsArray(soa, h_dex_elements, &dex_files_loaded);
     } else if (h_class_loader != nullptr) {
       VLOG(class_linker) << "Something unsupported with "
                          << mirror::Class::PrettyClass(h_class_loader->GetClass());
@@ -486,10 +570,8 @@
   if (!class_loader_ok) {
     // Add dex files from already loaded oat files, but skip boot.
 
-    // Clean up the queue.
-    while (!queue.empty()) {
-      queue.pop();
-    }
+    // Clean up the dex files.
+    dex_files_loaded.clear();
 
     std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
     // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
@@ -503,10 +585,7 @@
           boot_oat_files.end() && location != oat_file->GetLocation() &&
           unique_locations.find(location) == unique_locations.end()) {
         unique_locations.insert(location);
-        AddDexFilesFromOat(loaded_oat_file.get(),
-                           /*already_loaded*/true,
-                           &queue,
-                           /*out*/&opened_dex_files);
+        AddDexFilesFromOat(loaded_oat_file.get(), &dex_files_loaded, &opened_dex_files);
       }
     }
   }
@@ -514,46 +593,15 @@
   // Exit if shared libraries are ok. Do a full duplicate classes check otherwise.
   const std::string
       shared_libraries(oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
-  if (AreSharedLibrariesOk(shared_libraries, queue)) {
+  if (AreSharedLibrariesOk(shared_libraries, dex_files_loaded)) {
     return false;
   }
 
   ScopedTrace st("Collision check");
-
   // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue, &opened_dex_files);
-
-  // Now drain the queue.
-  while (!queue.empty()) {
-    // Modifying the top element is only safe if we pop right after.
-    DexFileAndClassPair compare_pop(queue.top());
-    queue.pop();
-
-    // Compare against the following elements.
-    while (!queue.empty()) {
-      DexFileAndClassPair top(queue.top());
-
-      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
-        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
-        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
-          *error_msg =
-              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
-                           compare_pop.GetCachedDescriptor(),
-                           compare_pop.GetDexFile()->GetLocation().c_str(),
-                           top.GetDexFile()->GetLocation().c_str());
-          return true;
-        }
-        queue.pop();
-        AddNext(&top, &queue);
-      } else {
-        // Something else. Done here.
-        break;
-      }
-    }
-    AddNext(&compare_pop, &queue);
-  }
-
-  return false;
+  std::vector<const DexFile*> dex_files_unloaded;
+  AddDexFilesFromOat(oat_file, &dex_files_unloaded, &opened_dex_files);
+  return CollisionCheck(dex_files_loaded, dex_files_unloaded, error_msg);
 }
 
 std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
@@ -729,9 +777,6 @@
     }
   }
 
-  // TODO(calin): Consider optimizing this knowing that is useless to record the
-  // use of fully compiled apks.
-  Runtime::Current()->NotifyDexLoaded(dex_location);
   return dex_files;
 }
 
diff --git a/runtime/obj_ptr-inl.h b/runtime/obj_ptr-inl.h
index d0be6dc..f2921da 100644
--- a/runtime/obj_ptr-inl.h
+++ b/runtime/obj_ptr-inl.h
@@ -22,27 +22,27 @@
 
 namespace art {
 
-template<class MirrorType, bool kPoison>
-inline bool ObjPtr<MirrorType, kPoison>::IsValid() const {
-  if (!kPoison || IsNull()) {
+template<class MirrorType>
+inline bool ObjPtr<MirrorType>::IsValid() const {
+  if (!kObjPtrPoisoning || IsNull()) {
     return true;
   }
   return GetCookie() == TrimCookie(Thread::Current()->GetPoisonObjectCookie());
 }
 
-template<class MirrorType, bool kPoison>
-inline void ObjPtr<MirrorType, kPoison>::AssertValid() const {
-  if (kPoison) {
+template<class MirrorType>
+inline void ObjPtr<MirrorType>::AssertValid() const {
+  if (kObjPtrPoisoning) {
     CHECK(IsValid()) << "Stale object pointer " << PtrUnchecked() << " , expected cookie "
         << TrimCookie(Thread::Current()->GetPoisonObjectCookie()) << " but got " << GetCookie();
   }
 }
 
-template<class MirrorType, bool kPoison>
-inline uintptr_t ObjPtr<MirrorType, kPoison>::Encode(MirrorType* ptr) {
+template<class MirrorType>
+inline uintptr_t ObjPtr<MirrorType>::Encode(MirrorType* ptr) {
   uintptr_t ref = reinterpret_cast<uintptr_t>(ptr);
   DCHECK_ALIGNED(ref, kObjectAlignment);
-  if (kPoison && ref != 0) {
+  if (kObjPtrPoisoning && ref != 0) {
     DCHECK_LE(ref, 0xFFFFFFFFU);
     ref >>= kObjectAlignmentShift;
     // Put cookie in high bits.
@@ -53,8 +53,8 @@
   return ref;
 }
 
-template<class MirrorType, bool kPoison>
-inline std::ostream& operator<<(std::ostream& os, ObjPtr<MirrorType, kPoison> ptr) {
+template<class MirrorType>
+inline std::ostream& operator<<(std::ostream& os, ObjPtr<MirrorType> ptr) {
   // May be used for dumping bad pointers, do not use the checked version.
   return os << ptr.PtrUnchecked();
 }
diff --git a/runtime/obj_ptr.h b/runtime/obj_ptr.h
index 2da2ae5..92cf4eb 100644
--- a/runtime/obj_ptr.h
+++ b/runtime/obj_ptr.h
@@ -26,10 +26,12 @@
 
 namespace art {
 
+constexpr bool kObjPtrPoisoning = kIsDebugBuild;
+
 // Value type representing a pointer to a mirror::Object of type MirrorType
 // Pass kPoison as a template boolean for testing in non-debug builds.
 // Since the cookie is thread based, it is not safe to share an ObjPtr between threads.
-template<class MirrorType, bool kPoison = kIsDebugBuild>
+template<class MirrorType>
 class ObjPtr {
   static constexpr size_t kCookieShift =
       sizeof(kHeapReferenceSize) * kBitsPerByte - kObjectAlignmentShift;
@@ -60,14 +62,14 @@
 
   template <typename Type,
             typename = typename std::enable_if<std::is_base_of<MirrorType, Type>::value>::type>
-  ALWAYS_INLINE ObjPtr(const ObjPtr<Type, kPoison>& other)  // NOLINT
+  ALWAYS_INLINE ObjPtr(const ObjPtr<Type>& other)  // NOLINT
       REQUIRES_SHARED(Locks::mutator_lock_)
       : reference_(Encode(static_cast<MirrorType*>(other.Ptr()))) {
   }
 
   template <typename Type,
             typename = typename std::enable_if<std::is_base_of<MirrorType, Type>::value>::type>
-  ALWAYS_INLINE ObjPtr& operator=(const ObjPtr<Type, kPoison>& other)
+  ALWAYS_INLINE ObjPtr& operator=(const ObjPtr<Type>& other)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     reference_ = Encode(static_cast<MirrorType*>(other.Ptr()));
     return *this;
@@ -130,7 +132,7 @@
 
   // Ptr unchecked does not check that object pointer is valid. Do not use if you can avoid it.
   ALWAYS_INLINE MirrorType* PtrUnchecked() const {
-    if (kPoison) {
+    if (kObjPtrPoisoning) {
       return reinterpret_cast<MirrorType*>(
           static_cast<uintptr_t>(static_cast<uint32_t>(reference_ << kObjectAlignmentShift)));
     } else {
@@ -167,46 +169,46 @@
 // Hash function for stl data structures.
 class HashObjPtr {
  public:
-  template<class MirrorType, bool kPoison>
-  size_t operator()(const ObjPtr<MirrorType, kPoison>& ptr) const NO_THREAD_SAFETY_ANALYSIS {
+  template<class MirrorType>
+  size_t operator()(const ObjPtr<MirrorType>& ptr) const NO_THREAD_SAFETY_ANALYSIS {
     return std::hash<MirrorType*>()(ptr.Ptr());
   }
 };
 
-template<class MirrorType, bool kPoison, typename PointerType>
-ALWAYS_INLINE bool operator==(const PointerType* a, const ObjPtr<MirrorType, kPoison>& b)
+template<class MirrorType, typename PointerType>
+ALWAYS_INLINE bool operator==(const PointerType* a, const ObjPtr<MirrorType>& b)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   return b == a;
 }
 
-template<class MirrorType, bool kPoison>
-ALWAYS_INLINE bool operator==(std::nullptr_t, const ObjPtr<MirrorType, kPoison>& b) {
+template<class MirrorType>
+ALWAYS_INLINE bool operator==(std::nullptr_t, const ObjPtr<MirrorType>& b) {
   return b == nullptr;
 }
 
-template<typename MirrorType, bool kPoison, typename PointerType>
-ALWAYS_INLINE bool operator!=(const PointerType* a, const ObjPtr<MirrorType, kPoison>& b)
+template<typename MirrorType, typename PointerType>
+ALWAYS_INLINE bool operator!=(const PointerType* a, const ObjPtr<MirrorType>& b)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   return b != a;
 }
 
-template<class MirrorType, bool kPoison>
-ALWAYS_INLINE bool operator!=(std::nullptr_t, const ObjPtr<MirrorType, kPoison>& b) {
+template<class MirrorType>
+ALWAYS_INLINE bool operator!=(std::nullptr_t, const ObjPtr<MirrorType>& b) {
   return b != nullptr;
 }
 
-template<class MirrorType, bool kPoison = kIsDebugBuild>
-static inline ObjPtr<MirrorType, kPoison> MakeObjPtr(MirrorType* ptr) {
-  return ObjPtr<MirrorType, kPoison>(ptr);
+template<class MirrorType>
+static inline ObjPtr<MirrorType> MakeObjPtr(MirrorType* ptr) {
+  return ObjPtr<MirrorType>(ptr);
 }
 
-template<class MirrorType, bool kPoison = kIsDebugBuild>
-static inline ObjPtr<MirrorType, kPoison> MakeObjPtr(ObjPtr<MirrorType, kPoison> ptr) {
-  return ObjPtr<MirrorType, kPoison>(ptr);
+template<class MirrorType>
+static inline ObjPtr<MirrorType> MakeObjPtr(ObjPtr<MirrorType> ptr) {
+  return ObjPtr<MirrorType>(ptr);
 }
 
-template<class MirrorType, bool kPoison>
-ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, ObjPtr<MirrorType, kPoison> ptr);
+template<class MirrorType>
+ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, ObjPtr<MirrorType> ptr);
 
 }  // namespace art
 
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index 4d726ec..ea5e698 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -43,7 +43,8 @@
   // May return the same address as the input if the object did not move.
   virtual mirror::Object* MarkObject(mirror::Object* obj) = 0;
   // Mark an object and update the value stored in the heap reference if the object moved.
-  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj) = 0;
+  virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
+                                 bool do_atomic_update) = 0;
 };
 
 }  // namespace art
diff --git a/runtime/openjdkjvmti/Android.bp b/runtime/openjdkjvmti/Android.bp
index c01e3f4..dd49ad0 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/runtime/openjdkjvmti/Android.bp
@@ -13,6 +13,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+cc_library_headers {
+    name: "libopenjdkjvmti_headers",
+    host_supported: true,
+    export_include_dirs: ["include"],
+}
+
 cc_defaults {
     name: "libopenjdkjvmti_defaults",
     defaults: ["art_defaults"],
@@ -40,6 +46,7 @@
            "ti_timers.cc",
            "transform.cc"],
     include_dirs: ["art/runtime"],
+    header_libs: ["libopenjdkjvmti_headers"],
     shared_libs: [
         "libbase",
         "libnativehelper",
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index a815a60..5401e5c 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -35,7 +35,7 @@
 
 #include <jni.h>
 
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 #include "art_jvmti.h"
 #include "base/logging.h"
@@ -66,14 +66,9 @@
 #include "ti_timers.h"
 #include "transform.h"
 
-// TODO Remove this at some point by annotating all the methods. It was put in to make the skeleton
-// easier to create.
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-
 namespace openjdkjvmti {
 
 EventHandler gEventHandler;
-ObjectTagTable gObjectTagTable(&gEventHandler);
 
 #define ENSURE_NON_NULL(n)      \
   do {                          \
@@ -137,38 +132,40 @@
     return ThreadUtil::GetAllThreads(env, threads_count_ptr, threads_ptr);
   }
 
-  static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread) {
+  static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SuspendThreadList(jvmtiEnv* env,
-                                      jint request_count,
-                                      const jthread* request_list,
-                                      jvmtiError* results) {
+                                      jint request_count ATTRIBUTE_UNUSED,
+                                      const jthread* request_list ATTRIBUTE_UNUSED,
+                                      jvmtiError* results ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ResumeThread(jvmtiEnv* env, jthread thread) {
+  static jvmtiError ResumeThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError ResumeThreadList(jvmtiEnv* env,
-                                     jint request_count,
-                                     const jthread* request_list,
-                                     jvmtiError* results) {
+                                     jint request_count ATTRIBUTE_UNUSED,
+                                     const jthread* request_list ATTRIBUTE_UNUSED,
+                                     jvmtiError* results ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError StopThread(jvmtiEnv* env, jthread thread, jobject exception) {
+  static jvmtiError StopThread(jvmtiEnv* env,
+                               jthread thread ATTRIBUTE_UNUSED,
+                               jobject exception ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_signal_thread);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread) {
+  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_signal_thread);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -178,24 +175,25 @@
   }
 
   static jvmtiError GetOwnedMonitorInfo(jvmtiEnv* env,
-                                        jthread thread,
-                                        jint* owned_monitor_count_ptr,
-                                        jobject** owned_monitors_ptr) {
+                                        jthread thread ATTRIBUTE_UNUSED,
+                                        jint* owned_monitor_count_ptr ATTRIBUTE_UNUSED,
+                                        jobject** owned_monitors_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_owned_monitor_info);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetOwnedMonitorStackDepthInfo(jvmtiEnv* env,
-                                                  jthread thread,
-                                                  jint* monitor_info_count_ptr,
-                                                  jvmtiMonitorStackDepthInfo** monitor_info_ptr) {
+  static jvmtiError GetOwnedMonitorStackDepthInfo(
+      jvmtiEnv* env,
+      jthread thread ATTRIBUTE_UNUSED,
+      jint* monitor_info_count_ptr ATTRIBUTE_UNUSED,
+      jvmtiMonitorStackDepthInfo** monitor_info_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_owned_monitor_stack_depth_info);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetCurrentContendedMonitor(jvmtiEnv* env,
-                                               jthread thread,
-                                               jobject* monitor_ptr) {
+                                               jthread thread ATTRIBUTE_UNUSED,
+                                               jobject* monitor_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_current_contended_monitor);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -279,7 +277,7 @@
     return StackUtil::GetFrameCount(env, thread, count_ptr);
   }
 
-  static jvmtiError PopFrame(jvmtiEnv* env, jthread thread) {
+  static jvmtiError PopFrame(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_pop_frame);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -292,37 +290,49 @@
     return StackUtil::GetFrameLocation(env, thread, depth, method_ptr, location_ptr);
   }
 
-  static jvmtiError NotifyFramePop(jvmtiEnv* env, jthread thread, jint depth) {
+  static jvmtiError NotifyFramePop(jvmtiEnv* env,
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_frame_pop_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnObject(jvmtiEnv* env, jthread thread, jobject value) {
+  static jvmtiError ForceEarlyReturnObject(jvmtiEnv* env,
+                                           jthread thread ATTRIBUTE_UNUSED,
+                                           jobject value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnInt(jvmtiEnv* env, jthread thread, jint value) {
+  static jvmtiError ForceEarlyReturnInt(jvmtiEnv* env,
+                                        jthread thread ATTRIBUTE_UNUSED,
+                                        jint value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnLong(jvmtiEnv* env, jthread thread, jlong value) {
+  static jvmtiError ForceEarlyReturnLong(jvmtiEnv* env,
+                                         jthread thread ATTRIBUTE_UNUSED,
+                                         jlong value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnFloat(jvmtiEnv* env, jthread thread, jfloat value) {
+  static jvmtiError ForceEarlyReturnFloat(jvmtiEnv* env,
+                                          jthread thread ATTRIBUTE_UNUSED,
+                                          jfloat value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnDouble(jvmtiEnv* env, jthread thread, jdouble value) {
+  static jvmtiError ForceEarlyReturnDouble(jvmtiEnv* env,
+                                           jthread thread ATTRIBUTE_UNUSED,
+                                           jdouble value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnVoid(jvmtiEnv* env, jthread thread) {
+  static jvmtiError ForceEarlyReturnVoid(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -334,7 +344,7 @@
                                      const jvmtiHeapCallbacks* callbacks,
                                      const void* user_data) {
     ENSURE_HAS_CAP(env, can_tag_objects);
-    HeapUtil heap_util(&gObjectTagTable);
+    HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.FollowReferences(env,
                                       heap_filter,
                                       klass,
@@ -349,7 +359,7 @@
                                        const jvmtiHeapCallbacks* callbacks,
                                        const void* user_data) {
     ENSURE_HAS_CAP(env, can_tag_objects);
-    HeapUtil heap_util(&gObjectTagTable);
+    HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.IterateThroughHeap(env, heap_filter, klass, callbacks, user_data);
   }
 
@@ -363,7 +373,7 @@
 
     art::ScopedObjectAccess soa(jni_env);
     art::ObjPtr<art::mirror::Object> obj = soa.Decode<art::mirror::Object>(object);
-    if (!gObjectTagTable.GetTag(obj.Ptr(), tag_ptr)) {
+    if (!ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table->GetTag(obj.Ptr(), tag_ptr)) {
       *tag_ptr = 0;
     }
 
@@ -384,7 +394,7 @@
 
     art::ScopedObjectAccess soa(jni_env);
     art::ObjPtr<art::mirror::Object> obj = soa.Decode<art::mirror::Object>(object);
-    gObjectTagTable.Set(obj.Ptr(), tag);
+    ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table->Set(obj.Ptr(), tag);
 
     return ERR(NONE);
   }
@@ -403,12 +413,12 @@
     }
 
     art::ScopedObjectAccess soa(jni_env);
-    return gObjectTagTable.GetTaggedObjects(env,
-                                            tag_count,
-                                            tags,
-                                            count_ptr,
-                                            object_result_ptr,
-                                            tag_result_ptr);
+    return ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table->GetTaggedObjects(env,
+                                                                               tag_count,
+                                                                               tags,
+                                                                               count_ptr,
+                                                                               object_result_ptr,
+                                                                               tag_result_ptr);
   }
 
   static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
@@ -417,169 +427,183 @@
 
   static jvmtiError IterateOverObjectsReachableFromObject(
       jvmtiEnv* env,
-      jobject object,
-      jvmtiObjectReferenceCallback object_reference_callback,
-      const void* user_data) {
+      jobject object ATTRIBUTE_UNUSED,
+      jvmtiObjectReferenceCallback object_reference_callback ATTRIBUTE_UNUSED,
+      const void* user_data ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError IterateOverReachableObjects(jvmtiEnv* env,
-                                                jvmtiHeapRootCallback heap_root_callback,
-                                                jvmtiStackReferenceCallback stack_ref_callback,
-                                                jvmtiObjectReferenceCallback object_ref_callback,
-                                                const void* user_data) {
+  static jvmtiError IterateOverReachableObjects(
+      jvmtiEnv* env,
+      jvmtiHeapRootCallback heap_root_callback ATTRIBUTE_UNUSED,
+      jvmtiStackReferenceCallback stack_ref_callback ATTRIBUTE_UNUSED,
+      jvmtiObjectReferenceCallback object_ref_callback ATTRIBUTE_UNUSED,
+      const void* user_data ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError IterateOverHeap(jvmtiEnv* env,
-                                    jvmtiHeapObjectFilter object_filter,
-                                    jvmtiHeapObjectCallback heap_object_callback,
-                                    const void* user_data) {
+                                    jvmtiHeapObjectFilter object_filter ATTRIBUTE_UNUSED,
+                                    jvmtiHeapObjectCallback heap_object_callback ATTRIBUTE_UNUSED,
+                                    const void* user_data ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError IterateOverInstancesOfClass(jvmtiEnv* env,
-                                                jclass klass,
-                                                jvmtiHeapObjectFilter object_filter,
-                                                jvmtiHeapObjectCallback heap_object_callback,
-                                                const void* user_data) {
+  static jvmtiError IterateOverInstancesOfClass(
+      jvmtiEnv* env,
+      jclass klass ATTRIBUTE_UNUSED,
+      jvmtiHeapObjectFilter object_filter ATTRIBUTE_UNUSED,
+      jvmtiHeapObjectCallback heap_object_callback ATTRIBUTE_UNUSED,
+      const void* user_data ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalObject(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jobject* value_ptr) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jobject* value_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalInstance(jvmtiEnv* env,
-                                     jthread thread,
-                                     jint depth,
-                                     jobject* value_ptr) {
+                                     jthread thread ATTRIBUTE_UNUSED,
+                                     jint depth ATTRIBUTE_UNUSED,
+                                     jobject* value_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalInt(jvmtiEnv* env,
-                                jthread thread,
-                                jint depth,
-                                jint slot,
-                                jint* value_ptr) {
+                                jthread thread ATTRIBUTE_UNUSED,
+                                jint depth ATTRIBUTE_UNUSED,
+                                jint slot ATTRIBUTE_UNUSED,
+                                jint* value_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalLong(jvmtiEnv* env,
-                                 jthread thread,
-                                 jint depth,
-                                 jint slot,
-                                 jlong* value_ptr) {
+                                 jthread thread ATTRIBUTE_UNUSED,
+                                 jint depth ATTRIBUTE_UNUSED,
+                                 jint slot ATTRIBUTE_UNUSED,
+                                 jlong* value_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalFloat(jvmtiEnv* env,
-                                  jthread thread,
-                                  jint depth,
-                                  jint slot,
-                                  jfloat* value_ptr) {
+                                  jthread thread ATTRIBUTE_UNUSED,
+                                  jint depth ATTRIBUTE_UNUSED,
+                                  jint slot ATTRIBUTE_UNUSED,
+                                  jfloat* value_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalDouble(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jdouble* value_ptr) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jdouble* value_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalObject(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jobject value) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jobject value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalInt(jvmtiEnv* env,
-                                jthread thread,
-                                jint depth,
-                                jint slot,
-                                jint value) {
+                                jthread thread ATTRIBUTE_UNUSED,
+                                jint depth ATTRIBUTE_UNUSED,
+                                jint slot ATTRIBUTE_UNUSED,
+                                jint value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalLong(jvmtiEnv* env,
-                                 jthread thread,
-                                 jint depth,
-                                 jint slot,
-                                 jlong value) {
+                                 jthread thread ATTRIBUTE_UNUSED,
+                                 jint depth ATTRIBUTE_UNUSED,
+                                 jint slot ATTRIBUTE_UNUSED,
+                                 jlong value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalFloat(jvmtiEnv* env,
-                                  jthread thread,
-                                  jint depth,
-                                  jint slot,
-                                  jfloat value) {
+                                  jthread thread ATTRIBUTE_UNUSED,
+                                  jint depth ATTRIBUTE_UNUSED,
+                                  jint slot ATTRIBUTE_UNUSED,
+                                  jfloat value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalDouble(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jdouble value) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jdouble value ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+  static jvmtiError SetBreakpoint(jvmtiEnv* env,
+                                  jmethodID method ATTRIBUTE_UNUSED,
+                                  jlocation location ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_breakpoint_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ClearBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+  static jvmtiError ClearBreakpoint(jvmtiEnv* env,
+                                    jmethodID method ATTRIBUTE_UNUSED,
+                                    jlocation location ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_breakpoint_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError SetFieldAccessWatch(jvmtiEnv* env,
+                                        jclass klass ATTRIBUTE_UNUSED,
+                                        jfieldID field ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_field_access_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ClearFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError ClearFieldAccessWatch(jvmtiEnv* env,
+                                          jclass klass ATTRIBUTE_UNUSED,
+                                          jfieldID field ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_field_access_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError SetFieldModificationWatch(jvmtiEnv* env,
+                                              jclass klass ATTRIBUTE_UNUSED,
+                                              jfieldID field ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_field_modification_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ClearFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError ClearFieldModificationWatch(jvmtiEnv* env,
+                                                jclass klass ATTRIBUTE_UNUSED,
+                                                jfieldID field ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_generate_field_modification_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr) {
-    HeapUtil heap_util(&gObjectTagTable);
+    HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.GetLoadedClasses(env, class_count_ptr, classes_ptr);
   }
 
@@ -601,7 +625,9 @@
     return ClassUtil::GetClassStatus(env, klass, status_ptr);
   }
 
-  static jvmtiError GetSourceFileName(jvmtiEnv* env, jclass klass, char** source_name_ptr) {
+  static jvmtiError GetSourceFileName(jvmtiEnv* env,
+                                      jclass klass ATTRIBUTE_UNUSED,
+                                      char** source_name_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_source_file_name);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -639,10 +665,10 @@
   }
 
   static jvmtiError GetConstantPool(jvmtiEnv* env,
-                                    jclass klass,
-                                    jint* constant_pool_count_ptr,
-                                    jint* constant_pool_byte_count_ptr,
-                                    unsigned char** constant_pool_bytes_ptr) {
+                                    jclass klass ATTRIBUTE_UNUSED,
+                                    jint* constant_pool_count_ptr ATTRIBUTE_UNUSED,
+                                    jint* constant_pool_byte_count_ptr ATTRIBUTE_UNUSED,
+                                    unsigned char** constant_pool_bytes_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_constant_pool);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -668,8 +694,8 @@
   }
 
   static jvmtiError GetSourceDebugExtension(jvmtiEnv* env,
-                                            jclass klass,
-                                            char** source_debug_extension_ptr) {
+                                            jclass klass ATTRIBUTE_UNUSED,
+                                            char** source_debug_extension_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_source_debug_extension);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -678,6 +704,7 @@
     ENSURE_HAS_CAP(env, can_retransform_classes);
     std::string error_msg;
     jvmtiError res = Transformer::RetransformClasses(ArtJvmTiEnv::AsArtJvmTiEnv(env),
+                                                     &gEventHandler,
                                                      art::Runtime::Current(),
                                                      art::Thread::Current(),
                                                      class_count,
@@ -695,6 +722,7 @@
     ENSURE_HAS_CAP(env, can_redefine_classes);
     std::string error_msg;
     jvmtiError res = Redefiner::RedefineClasses(ArtJvmTiEnv::AsArtJvmTiEnv(env),
+                                                &gEventHandler,
                                                 art::Runtime::Current(),
                                                 art::Thread::Current(),
                                                 class_count,
@@ -715,8 +743,8 @@
   }
 
   static jvmtiError GetObjectMonitorUsage(jvmtiEnv* env,
-                                          jobject object,
-                                          jvmtiMonitorUsage* info_ptr) {
+                                          jobject object ATTRIBUTE_UNUSED,
+                                          jvmtiMonitorUsage* info_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_monitor_info);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -800,17 +828,17 @@
   }
 
   static jvmtiError GetLocalVariableTable(jvmtiEnv* env,
-                                          jmethodID method,
-                                          jint* entry_count_ptr,
-                                          jvmtiLocalVariableEntry** table_ptr) {
+                                          jmethodID method ATTRIBUTE_UNUSED,
+                                          jint* entry_count_ptr ATTRIBUTE_UNUSED,
+                                          jvmtiLocalVariableEntry** table_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetBytecodes(jvmtiEnv* env,
-                                 jmethodID method,
-                                 jint* bytecode_count_ptr,
-                                 unsigned char** bytecodes_ptr) {
+                                 jmethodID method ATTRIBUTE_UNUSED,
+                                 jint* bytecode_count_ptr ATTRIBUTE_UNUSED,
+                                 unsigned char** bytecodes_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_bytecodes);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -828,12 +856,14 @@
     return MethodUtil::IsMethodObsolete(env, method, is_obsolete_ptr);
   }
 
-  static jvmtiError SetNativeMethodPrefix(jvmtiEnv* env, const char* prefix) {
+  static jvmtiError SetNativeMethodPrefix(jvmtiEnv* env, const char* prefix ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_set_native_method_prefix);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetNativeMethodPrefixes(jvmtiEnv* env, jint prefix_count, char** prefixes) {
+  static jvmtiError SetNativeMethodPrefixes(jvmtiEnv* env,
+                                            jint prefix_count ATTRIBUTE_UNUSED,
+                                            char** prefixes ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_set_native_method_prefix);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -925,11 +955,12 @@
     return gEventHandler.SetEvent(art_env, art_thread, GetArtJvmtiEvent(art_env, event_type), mode);
   }
 
-  static jvmtiError GenerateEvents(jvmtiEnv* env, jvmtiEvent event_type) {
-    return ERR(NOT_IMPLEMENTED);
+  static jvmtiError GenerateEvents(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                   jvmtiEvent event_type ATTRIBUTE_UNUSED) {
+    return OK;
   }
 
-  static jvmtiError GetExtensionFunctions(jvmtiEnv* env,
+  static jvmtiError GetExtensionFunctions(jvmtiEnv* env ATTRIBUTE_UNUSED,
                                           jint* extension_count_ptr,
                                           jvmtiExtensionFunctionInfo** extensions) {
     // We do not have any extension functions.
@@ -939,7 +970,7 @@
     return ERR(NONE);
   }
 
-  static jvmtiError GetExtensionEvents(jvmtiEnv* env,
+  static jvmtiError GetExtensionEvents(jvmtiEnv* env ATTRIBUTE_UNUSED,
                                        jint* extension_count_ptr,
                                        jvmtiExtensionEventInfo** extensions) {
     // We do not have any extension events.
@@ -949,9 +980,9 @@
     return ERR(NONE);
   }
 
-  static jvmtiError SetExtensionEventCallback(jvmtiEnv* env,
-                                              jint extension_event_index,
-                                              jvmtiExtensionEvent callback) {
+  static jvmtiError SetExtensionEventCallback(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                              jint extension_event_index ATTRIBUTE_UNUSED,
+                                              jvmtiExtensionEvent callback ATTRIBUTE_UNUSED) {
     // We do not have any extension events, so any call is illegal.
     return ERR(ILLEGAL_ARGUMENT);
   }
@@ -969,10 +1000,15 @@
     ArtJvmTiEnv* art_env = static_cast<ArtJvmTiEnv*>(env);
     jvmtiError ret = OK;
     jvmtiCapabilities changed;
+    jvmtiCapabilities potential_capabilities;
+    ret = env->GetPotentialCapabilities(&potential_capabilities);
+    if (ret != OK) {
+      return ret;
+    }
 #define ADD_CAPABILITY(e) \
     do { \
       if (capabilities_ptr->e == 1) { \
-        if (kPotentialCapabilities.e == 1) { \
+        if (potential_capabilities.e == 1) { \
           if (art_env->capabilities.e != 1) { \
             art_env->capabilities.e = 1; \
             changed.e = 1; \
@@ -1103,22 +1139,26 @@
     return OK;
   }
 
-  static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+  static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env,
+                                                 jvmtiTimerInfo* info_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_current_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetCurrentThreadCpuTime(jvmtiEnv* env, jlong* nanos_ptr) {
+  static jvmtiError GetCurrentThreadCpuTime(jvmtiEnv* env, jlong* nanos_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_current_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+  static jvmtiError GetThreadCpuTimerInfo(jvmtiEnv* env,
+                                          jvmtiTimerInfo* info_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetThreadCpuTime(jvmtiEnv* env, jthread thread, jlong* nanos_ptr) {
+  static jvmtiError GetThreadCpuTime(jvmtiEnv* env,
+                                     jthread thread ATTRIBUTE_UNUSED,
+                                     jlong* nanos_ptr ATTRIBUTE_UNUSED) {
     ENSURE_HAS_CAP(env, can_get_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -1162,6 +1202,8 @@
   static jvmtiError DisposeEnvironment(jvmtiEnv* env) {
     ENSURE_VALID_ENV(env);
     gEventHandler.RemoveArtJvmTiEnv(ArtJvmTiEnv::AsArtJvmTiEnv(env));
+    art::Runtime::Current()->RemoveSystemWeakHolder(
+        ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     delete env;
     return OK;
   }
@@ -1188,13 +1230,13 @@
     ENSURE_NON_NULL(name_ptr);
     switch (error) {
 #define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \
-          jvmtiError res = CopyString(env, \
-                                      "JVMTI_ERROR_"#e, \
-                                      reinterpret_cast<unsigned char**>(name_ptr)); \
-          if (res != OK) { \
+          jvmtiError res; \
+          JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_"#e, &res); \
+          if (copy == nullptr) { \
             *name_ptr = nullptr; \
             return res; \
           } else { \
+            *name_ptr = copy.release(); \
             return OK; \
           } \
         } while (false)
@@ -1248,20 +1290,22 @@
       ERROR_CASE(INVALID_ENVIRONMENT);
 #undef ERROR_CASE
       default: {
-        jvmtiError res = CopyString(env,
-                                    "JVMTI_ERROR_UNKNOWN",
-                                    reinterpret_cast<unsigned char**>(name_ptr));
-        if (res != OK) {
+        jvmtiError res;
+        JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_UNKNOWN", &res);
+        if (copy == nullptr) {
           *name_ptr = nullptr;
           return res;
         } else {
+          *name_ptr = copy.release();
           return ERR(ILLEGAL_ARGUMENT);
         }
       }
     }
   }
 
-  static jvmtiError SetVerboseFlag(jvmtiEnv* env, jvmtiVerboseFlag flag, jboolean value) {
+  static jvmtiError SetVerboseFlag(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                   jvmtiVerboseFlag flag,
+                                   jboolean value) {
     if (flag == jvmtiVerboseFlag::JVMTI_VERBOSE_OTHER) {
       // OTHER is special, as it's 0, so can't do a bit check.
       bool val = (value == JNI_TRUE) ? true : false;
@@ -1315,7 +1359,8 @@
     return ERR(NONE);
   }
 
-  static jvmtiError GetJLocationFormat(jvmtiEnv* env, jvmtiJlocationFormat* format_ptr) {
+  static jvmtiError GetJLocationFormat(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                       jvmtiJlocationFormat* format_ptr) {
     // Report BCI as jlocation format. We report dex bytecode indices.
     if (format_ptr == nullptr) {
       return ERR(NULL_POINTER);
@@ -1333,13 +1378,25 @@
          version == JVMTI_VERSION;
 }
 
+extern const jvmtiInterface_1 gJvmtiInterface;
+ArtJvmTiEnv::ArtJvmTiEnv(art::JavaVMExt* runtime, EventHandler* event_handler)
+    : art_vm(runtime),
+      local_data(nullptr),
+      capabilities(),
+      object_tag_table(new ObjectTagTable(event_handler)) {
+  functions = &gJvmtiInterface;
+}
+
 // Creates a jvmtiEnv and returns it with the art::ti::Env that is associated with it. new_art_ti
 // is a pointer to the uninitialized memory for an art::ti::Env.
 static void CreateArtJvmTiEnv(art::JavaVMExt* vm, /*out*/void** new_jvmtiEnv) {
-  struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm);
+  struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm, &gEventHandler);
   *new_jvmtiEnv = env;
 
   gEventHandler.RegisterArtJvmTiEnv(env);
+
+  art::Runtime::Current()->AddSystemWeakHolder(
+      ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
 }
 
 // A hook that the runtime uses to allow plugins to handle GetEnv calls. It returns true and
@@ -1369,9 +1426,9 @@
   ClassUtil::Register(&gEventHandler);
   DumpUtil::Register(&gEventHandler);
   SearchUtil::Register();
+  HeapUtil::Register();
 
   runtime->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
-  runtime->AddSystemWeakHolder(&gObjectTagTable);
 
   return true;
 }
@@ -1382,6 +1439,7 @@
   ClassUtil::Unregister();
   DumpUtil::Unregister();
   SearchUtil::Unregister();
+  HeapUtil::Unregister();
 
   return true;
 }
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index 106165c..2ff3a47 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -33,6 +33,7 @@
 #define ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
 
 #include <memory>
+#include <type_traits>
 
 #include <jni.h>
 
@@ -47,8 +48,7 @@
 
 namespace openjdkjvmti {
 
-extern const jvmtiInterface_1 gJvmtiInterface;
-extern EventHandler gEventHandler;
+class ObjectTagTable;
 
 // A structure that is a jvmtiEnv with additional information for the runtime.
 struct ArtJvmTiEnv : public jvmtiEnv {
@@ -59,10 +59,10 @@
   EventMasks event_masks;
   std::unique_ptr<jvmtiEventCallbacks> event_callbacks;
 
-  explicit ArtJvmTiEnv(art::JavaVMExt* runtime)
-      : art_vm(runtime), local_data(nullptr), capabilities() {
-    functions = &gJvmtiInterface;
-  }
+  // Tagging is specific to the jvmtiEnv.
+  std::unique_ptr<ObjectTagTable> object_tag_table;
+
+  ArtJvmTiEnv(art::JavaVMExt* runtime, EventHandler* event_handler);
 
   static ArtJvmTiEnv* AsArtJvmTiEnv(jvmtiEnv* env) {
     return art::down_cast<ArtJvmTiEnv*>(env);
@@ -86,6 +86,7 @@
   return ret_value;
 }
 
+template <typename T>
 class JvmtiDeleter {
  public:
   JvmtiDeleter() : env_(nullptr) {}
@@ -95,9 +96,9 @@
   JvmtiDeleter(JvmtiDeleter&&) = default;
   JvmtiDeleter& operator=(const JvmtiDeleter&) = default;
 
-  void operator()(unsigned char* ptr) const {
+  void operator()(T* ptr) const {
     CHECK(env_ != nullptr);
-    jvmtiError ret = env_->Deallocate(ptr);
+    jvmtiError ret = env_->Deallocate(reinterpret_cast<unsigned char*>(ptr));
     CHECK(ret == ERR(NONE));
   }
 
@@ -105,12 +106,65 @@
   mutable jvmtiEnv* env_;
 };
 
-using JvmtiUniquePtr = std::unique_ptr<unsigned char, JvmtiDeleter>;
+template <typename T>
+class JvmtiDeleter<T[]> {
+  public:
+  JvmtiDeleter() : env_(nullptr) {}
+  explicit JvmtiDeleter(jvmtiEnv* env) : env_(env) {}
+
+  JvmtiDeleter(JvmtiDeleter&) = default;
+  JvmtiDeleter(JvmtiDeleter&&) = default;
+  JvmtiDeleter& operator=(const JvmtiDeleter&) = default;
+
+  template <typename U>
+  void operator()(U* ptr) const {
+    CHECK(env_ != nullptr);
+    jvmtiError ret = env_->Deallocate(reinterpret_cast<unsigned char*>(ptr));
+    CHECK(ret == ERR(NONE));
+  }
+
+ private:
+  mutable jvmtiEnv* env_;
+};
+
+template <typename T>
+using JvmtiUniquePtr = std::unique_ptr<T, JvmtiDeleter<T>>;
 
 template <typename T>
 ALWAYS_INLINE
-static inline JvmtiUniquePtr MakeJvmtiUniquePtr(jvmtiEnv* env, T* mem) {
-  return JvmtiUniquePtr(reinterpret_cast<unsigned char*>(mem), JvmtiDeleter(env));
+static inline JvmtiUniquePtr<T> MakeJvmtiUniquePtr(jvmtiEnv* env, T* mem) {
+  return JvmtiUniquePtr<T>(mem, JvmtiDeleter<T>(env));
+}
+
+template <typename T>
+ALWAYS_INLINE
+static inline JvmtiUniquePtr<T> MakeJvmtiUniquePtr(jvmtiEnv* env, unsigned char* mem) {
+  return JvmtiUniquePtr<T>(reinterpret_cast<T*>(mem), JvmtiDeleter<T>(env));
+}
+
+template <typename T>
+ALWAYS_INLINE
+static inline JvmtiUniquePtr<T> AllocJvmtiUniquePtr(jvmtiEnv* env, jvmtiError* error) {
+  unsigned char* tmp;
+  *error = env->Allocate(sizeof(T), &tmp);
+  if (*error != ERR(NONE)) {
+    return JvmtiUniquePtr<T>();
+  }
+  return JvmtiUniquePtr<T>(tmp, JvmtiDeleter<T>(env));
+}
+
+template <typename T>
+ALWAYS_INLINE
+static inline JvmtiUniquePtr<T> AllocJvmtiUniquePtr(jvmtiEnv* env,
+                                                    size_t count,
+                                                    jvmtiError* error) {
+  unsigned char* tmp;
+  *error = env->Allocate(sizeof(typename std::remove_extent<T>::type) * count, &tmp);
+  if (*error != ERR(NONE)) {
+    return JvmtiUniquePtr<T>();
+  }
+  return JvmtiUniquePtr<T>(reinterpret_cast<typename std::remove_extent<T>::type*>(tmp),
+                           JvmtiDeleter<T>(env));
 }
 
 ALWAYS_INLINE
@@ -129,15 +183,12 @@
 }
 
 ALWAYS_INLINE
-static inline jvmtiError CopyString(jvmtiEnv* env, const char* src, unsigned char** copy) {
+static inline JvmtiUniquePtr<char[]> CopyString(jvmtiEnv* env, const char* src, jvmtiError* error) {
   size_t len = strlen(src) + 1;
-  unsigned char* buf;
-  jvmtiError ret = env->Allocate(len, &buf);
-  if (ret != ERR(NONE)) {
-    return ret;
+  JvmtiUniquePtr<char[]> ret = AllocJvmtiUniquePtr<char[]>(env, len, error);
+  if (ret != nullptr) {
+    strcpy(ret.get(), src);
   }
-  strcpy(reinterpret_cast<char*>(buf), src);
-  *copy = buf;
   return ret;
 }
 
diff --git a/runtime/openjdkjvmti/jvmti.h b/runtime/openjdkjvmti/include/jvmti.h
similarity index 100%
rename from runtime/openjdkjvmti/jvmti.h
rename to runtime/openjdkjvmti/include/jvmti.h
diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
new file mode 100644
index 0000000..f67fffc
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
@@ -0,0 +1,389 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+
+#include "jvmti_weak_table.h"
+
+#include <limits>
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "gc/allocation_listener.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti_allocator.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+
+namespace openjdkjvmti {
+
+template <typename T>
+void JvmtiWeakTable<T>::Lock() {
+  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
+}
+template <typename T>
+void JvmtiWeakTable<T>::Unlock() {
+  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
+}
+template <typename T>
+void JvmtiWeakTable<T>::AssertLocked() {
+  allow_disallow_lock_.AssertHeld(art::Thread::Current());
+}
+
+template <typename T>
+void JvmtiWeakTable<T>::UpdateTableWithReadBarrier() {
+  update_since_last_sweep_ = true;
+
+  auto WithReadBarrierUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root,
+                                    art::mirror::Object* original_obj ATTRIBUTE_UNUSED)
+     REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return original_root.Read<art::kWithReadBarrier>();
+  };
+
+  UpdateTableWith<decltype(WithReadBarrierUpdater), kIgnoreNull>(WithReadBarrierUpdater);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, T* result) {
+  // Under concurrent GC, there is a window between moving objects and sweeping of system
+  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+  // but still have from-space pointers in the table. Explicitly update the table once.
+  // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+  UpdateTableWithReadBarrier();
+  return GetTagLocked(self, obj, result);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::Remove(art::mirror::Object* obj, /* out */ T* tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
+template <typename T>
+bool JvmtiWeakTable<T>::RemoveLocked(art::mirror::Object* obj, T* tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::RemoveLocked(art::Thread* self, art::mirror::Object* obj, T* tag) {
+  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+  if (it != tagged_objects_.end()) {
+    if (tag != nullptr) {
+      *tag = it->second;
+    }
+    tagged_objects_.erase(it);
+    return true;
+  }
+
+  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
+    // Under concurrent GC, there is a window between moving objects and sweeping of system
+    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+    // but still have from-space pointers in the table. Explicitly update the table once.
+    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+
+    // Update the table.
+    UpdateTableWithReadBarrier();
+
+    // And try again.
+    return RemoveLocked(self, obj, tag);
+  }
+
+  // Not in here.
+  return false;
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::Set(art::mirror::Object* obj, T new_tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
+template <typename T>
+bool JvmtiWeakTable<T>::SetLocked(art::mirror::Object* obj, T new_tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::SetLocked(art::Thread* self, art::mirror::Object* obj, T new_tag) {
+  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+  if (it != tagged_objects_.end()) {
+    it->second = new_tag;
+    return true;
+  }
+
+  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
+    // Under concurrent GC, there is a window between moving objects and sweeping of system
+    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+    // but still have from-space pointers in the table. Explicitly update the table once.
+    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+
+    // Update the table.
+    UpdateTableWithReadBarrier();
+
+    // And try again.
+    return SetLocked(self, obj, new_tag);
+  }
+
+  // New element.
+  auto insert_it = tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(obj), new_tag);
+  DCHECK(insert_it.second);
+  return false;
+}
+
+template <typename T>
+void JvmtiWeakTable<T>::Sweep(art::IsMarkedVisitor* visitor) {
+  if (DoesHandleNullOnSweep()) {
+    SweepImpl<true>(visitor);
+  } else {
+    SweepImpl<false>(visitor);
+  }
+
+  // Under concurrent GC, there is a window between moving objects and sweeping of system
+  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+  // but still have from-space pointers in the table. We explicitly update the table then
+  // to ensure we compare against to-space pointers. But we want to do this only once. Once
+  // sweeping is done, we know all objects are to-space pointers until the next GC cycle,
+  // so we re-enable the explicit update for the next marking.
+  update_since_last_sweep_ = false;
+}
+
+template <typename T>
+template <bool kHandleNull>
+void JvmtiWeakTable<T>::SweepImpl(art::IsMarkedVisitor* visitor) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+
+  auto IsMarkedUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root ATTRIBUTE_UNUSED,
+                             art::mirror::Object* original_obj) {
+    return visitor->IsMarked(original_obj);
+  };
+
+  UpdateTableWith<decltype(IsMarkedUpdater),
+                  kHandleNull ? kCallHandleNull : kRemoveNull>(IsMarkedUpdater);
+}
+
+template <typename T>
+template <typename Updater, typename JvmtiWeakTable<T>::TableUpdateNullTarget kTargetNull>
+ALWAYS_INLINE inline void JvmtiWeakTable<T>::UpdateTableWith(Updater& updater) {
+  // We optimistically hope that elements will still be well-distributed when re-inserting them.
+  // So play with the map mechanics, and postpone rehashing. This avoids the need of a side
+  // vector and two passes.
+  float original_max_load_factor = tagged_objects_.max_load_factor();
+  tagged_objects_.max_load_factor(std::numeric_limits<float>::max());
+  // For checking that a max load-factor actually does what we expect.
+  size_t original_bucket_count = tagged_objects_.bucket_count();
+
+  for (auto it = tagged_objects_.begin(); it != tagged_objects_.end();) {
+    DCHECK(!it->first.IsNull());
+    art::mirror::Object* original_obj = it->first.template Read<art::kWithoutReadBarrier>();
+    art::mirror::Object* target_obj = updater(it->first, original_obj);
+    if (original_obj != target_obj) {
+      if (kTargetNull == kIgnoreNull && target_obj == nullptr) {
+        // Ignore null target, don't do anything.
+      } else {
+        T tag = it->second;
+        it = tagged_objects_.erase(it);
+        if (target_obj != nullptr) {
+          tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(target_obj), tag);
+          DCHECK_EQ(original_bucket_count, tagged_objects_.bucket_count());
+        } else if (kTargetNull == kCallHandleNull) {
+          HandleNullSweep(tag);
+        }
+        continue;  // Iterator was implicitly updated by erase.
+      }
+    }
+    it++;
+  }
+
+  tagged_objects_.max_load_factor(original_max_load_factor);
+  // TODO: consider rehash here.
+}
+
+template <typename T>
+template <typename Storage, class Allocator>
+struct JvmtiWeakTable<T>::ReleasableContainer {
+  using allocator_type = Allocator;
+
+  explicit ReleasableContainer(const allocator_type& alloc, size_t reserve = 10)
+      : allocator(alloc),
+        data(reserve > 0 ? allocator.allocate(reserve) : nullptr),
+        size(0),
+        capacity(reserve) {
+  }
+
+  ~ReleasableContainer() {
+    if (data != nullptr) {
+      allocator.deallocate(data, capacity);
+      capacity = 0;
+      size = 0;
+    }
+  }
+
+  Storage* Release() {
+    Storage* tmp = data;
+
+    data = nullptr;
+    size = 0;
+    capacity = 0;
+
+    return tmp;
+  }
+
+  void Resize(size_t new_capacity) {
+    CHECK_GT(new_capacity, capacity);
+
+    Storage* tmp = allocator.allocate(new_capacity);
+    DCHECK(tmp != nullptr);
+    if (data != nullptr) {
+      memcpy(tmp, data, sizeof(Storage) * size);
+    }
+    Storage* old = data;
+    data = tmp;
+    allocator.deallocate(old, capacity);
+    capacity = new_capacity;
+  }
+
+  void Pushback(const Storage& elem) {
+    if (size == capacity) {
+      size_t new_capacity = 2 * capacity + 1;
+      Resize(new_capacity);
+    }
+    data[size++] = elem;
+  }
+
+  Allocator allocator;
+  Storage* data;
+  size_t size;
+  size_t capacity;
+};
+
+template <typename T>
+jvmtiError JvmtiWeakTable<T>::GetTaggedObjects(jvmtiEnv* jvmti_env,
+                                               jint tag_count,
+                                               const T* tags,
+                                               jint* count_ptr,
+                                               jobject** object_result_ptr,
+                                               T** tag_result_ptr) {
+  if (tag_count < 0) {
+    return ERR(ILLEGAL_ARGUMENT);
+  }
+  if (tag_count > 0) {
+    for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
+      if (tags[i] == 0) {
+        return ERR(ILLEGAL_ARGUMENT);
+      }
+    }
+  }
+  if (tags == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+  if (count_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  art::JNIEnvExt* jni_env = self->GetJniEnv();
+
+  constexpr size_t kDefaultSize = 10;
+  size_t initial_object_size;
+  size_t initial_tag_size;
+  if (tag_count == 0) {
+    initial_object_size = (object_result_ptr != nullptr) ? tagged_objects_.size() : 0;
+    initial_tag_size = (tag_result_ptr != nullptr) ? tagged_objects_.size() : 0;
+  } else {
+    initial_object_size = initial_tag_size = kDefaultSize;
+  }
+  JvmtiAllocator<void> allocator(jvmti_env);
+  ReleasableContainer<jobject, JvmtiAllocator<jobject>> selected_objects(allocator,
+                                                                         initial_object_size);
+  ReleasableContainer<T, JvmtiAllocator<T>> selected_tags(allocator, initial_tag_size);
+
+  size_t count = 0;
+  for (auto& pair : tagged_objects_) {
+    bool select;
+    if (tag_count > 0) {
+      select = false;
+      for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
+        if (tags[i] == pair.second) {
+          select = true;
+          break;
+        }
+      }
+    } else {
+      select = true;
+    }
+
+    if (select) {
+      art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>();
+      if (obj != nullptr) {
+        count++;
+        if (object_result_ptr != nullptr) {
+          selected_objects.Pushback(jni_env->AddLocalReference<jobject>(obj));
+        }
+        if (tag_result_ptr != nullptr) {
+          selected_tags.Pushback(pair.second);
+        }
+      }
+    }
+  }
+
+  if (object_result_ptr != nullptr) {
+    *object_result_ptr = selected_objects.Release();
+  }
+  if (tag_result_ptr != nullptr) {
+    *tag_result_ptr = selected_tags.Release();
+  }
+  *count_ptr = static_cast<jint>(count);
+  return ERR(NONE);
+}
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
new file mode 100644
index 0000000..ae36122
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -0,0 +1,219 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+
+#include <unordered_map>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "gc/system_weak.h"
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jvmti.h"
+#include "mirror/object.h"
+#include "thread-inl.h"
+
+namespace openjdkjvmti {
+
+class EventHandler;
+
+// A system-weak container mapping objects to elements of the template type. This corresponds
+// to a weak hash map. For historical reasons the stored value is called "tag."
+template <typename T>
+class JvmtiWeakTable : public art::gc::SystemWeakHolder {
+ public:
+  JvmtiWeakTable()
+      : art::gc::SystemWeakHolder(kTaggingLockLevel),
+        update_since_last_sweep_(false) {
+  }
+
+  // Remove the mapping for the given object, returning whether such a mapping existed (and the old
+  // value).
+  bool Remove(art::mirror::Object* obj, /* out */ T* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  bool RemoveLocked(art::mirror::Object* obj, /* out */ T* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Set the mapping for the given object. Returns true if this overwrites an already existing
+  // mapping.
+  virtual bool Set(art::mirror::Object* obj, T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  virtual bool SetLocked(art::mirror::Object* obj, T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Return the value associated with the given object. Returns true if the mapping exists, false
+  // otherwise.
+  bool GetTag(art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    art::MutexLock mu(self, allow_disallow_lock_);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+  bool GetTagLocked(art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    allow_disallow_lock_.AssertHeld(self);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+
+  // Sweep the container. DO NOT CALL MANUALLY.
+  void Sweep(art::IsMarkedVisitor* visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  // Return all objects that have a value mapping in tags.
+  jvmtiError GetTaggedObjects(jvmtiEnv* jvmti_env,
+                              jint tag_count,
+                              const T* tags,
+                              /* out */ jint* count_ptr,
+                              /* out */ jobject** object_result_ptr,
+                              /* out */ T** tag_result_ptr)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  // Locking functions, to allow coarse-grained locking and amortization.
+  void Lock() ACQUIRE(allow_disallow_lock_);
+  void Unlock() RELEASE(allow_disallow_lock_);
+  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+
+ protected:
+  // Should HandleNullSweep be called when Sweep detects the release of an object?
+  virtual bool DoesHandleNullOnSweep() {
+    return false;
+  }
+  // If DoesHandleNullOnSweep returns true, this function will be called.
+  virtual void HandleNullSweep(T tag ATTRIBUTE_UNUSED) {}
+
+ private:
+  bool SetLocked(art::Thread* self, art::mirror::Object* obj, T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool RemoveLocked(art::Thread* self, art::mirror::Object* obj, /* out */ T* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool GetTagLocked(art::Thread* self, art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+    if (it != tagged_objects_.end()) {
+      *result = it->second;
+      return true;
+    }
+
+    // Performance optimization: To avoid multiple table updates, ensure that during GC we
+    // only update once. See the comment on the implementation of GetTagSlowPath.
+    if (art::kUseReadBarrier &&
+        self != nullptr &&
+        self->GetIsGcMarking() &&
+        !update_since_last_sweep_) {
+      return GetTagSlowPath(self, obj, result);
+    }
+
+    return false;
+  }
+
+  // Slow-path for GetTag. We didn't find the object, but we might be storing from-pointers and
+  // are asked to retrieve with a to-pointer.
+  bool GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Update the table by doing read barriers on each element, ensuring that to-space pointers
+  // are stored.
+  void UpdateTableWithReadBarrier()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  template <bool kHandleNull>
+  void SweepImpl(art::IsMarkedVisitor* visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  enum TableUpdateNullTarget {
+    kIgnoreNull,
+    kRemoveNull,
+    kCallHandleNull
+  };
+
+  template <typename Updater, TableUpdateNullTarget kTargetNull>
+  void UpdateTableWith(Updater& updater)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  template <typename Storage, class Allocator = std::allocator<T>>
+  struct ReleasableContainer;
+
+  struct HashGcRoot {
+    size_t operator()(const art::GcRoot<art::mirror::Object>& r) const
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return reinterpret_cast<uintptr_t>(r.Read<art::kWithoutReadBarrier>());
+    }
+  };
+
+  struct EqGcRoot {
+    bool operator()(const art::GcRoot<art::mirror::Object>& r1,
+                    const art::GcRoot<art::mirror::Object>& r2) const
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return r1.Read<art::kWithoutReadBarrier>() == r2.Read<art::kWithoutReadBarrier>();
+    }
+  };
+
+  // The tag table is used when visiting roots. So it needs to have a low lock level.
+  static constexpr art::LockLevel kTaggingLockLevel =
+      static_cast<art::LockLevel>(art::LockLevel::kAbortLock + 1);
+
+  std::unordered_map<art::GcRoot<art::mirror::Object>,
+                     T,
+                     HashGcRoot,
+                     EqGcRoot> tagged_objects_
+      GUARDED_BY(allow_disallow_lock_)
+      GUARDED_BY(art::Locks::mutator_lock_);
+  // To avoid repeatedly scanning the whole table, remember if we did that since the last sweep.
+  bool update_since_last_sweep_;
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/runtime/openjdkjvmti/object_tagging.cc
index b27c2a3..4215588 100644
--- a/runtime/openjdkjvmti/object_tagging.cc
+++ b/runtime/openjdkjvmti/object_tagging.cc
@@ -33,355 +33,34 @@
 
 #include <limits>
 
-#include "art_jvmti.h"
-#include "base/logging.h"
 #include "events-inl.h"
-#include "gc/allocation_listener.h"
-#include "instrumentation.h"
-#include "jni_env_ext-inl.h"
-#include "jvmti_allocator.h"
-#include "mirror/class.h"
-#include "mirror/object.h"
-#include "runtime.h"
-#include "ScopedLocalRef.h"
+#include "jvmti_weak_table-inl.h"
 
 namespace openjdkjvmti {
 
-void ObjectTagTable::Lock() {
-  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
-}
-void ObjectTagTable::Unlock() {
-  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
-}
-void ObjectTagTable::AssertLocked() {
-  allow_disallow_lock_.AssertHeld(art::Thread::Current());
-}
-
-void ObjectTagTable::UpdateTableWithReadBarrier() {
-  update_since_last_sweep_ = true;
-
-  auto WithReadBarrierUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root,
-                                    art::mirror::Object* original_obj ATTRIBUTE_UNUSED)
-     REQUIRES_SHARED(art::Locks::mutator_lock_) {
-    return original_root.Read<art::kWithReadBarrier>();
-  };
-
-  UpdateTableWith<decltype(WithReadBarrierUpdater), kIgnoreNull>(WithReadBarrierUpdater);
-}
-
-bool ObjectTagTable::GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, jlong* result) {
-  // Under concurrent GC, there is a window between moving objects and sweeping of system
-  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-  // but still have from-space pointers in the table. Explicitly update the table once.
-  // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
-  UpdateTableWithReadBarrier();
-  return GetTagLocked(self, obj, result);
-}
-
-void ObjectTagTable::Add(art::mirror::Object* obj, jlong tag) {
-  // Same as Set(), as we don't have duplicates in an unordered_map.
-  Set(obj, tag);
-}
-
-bool ObjectTagTable::Remove(art::mirror::Object* obj, jlong* tag) {
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-  Wait(self);
-
-  return RemoveLocked(self, obj, tag);
-}
-bool ObjectTagTable::RemoveLocked(art::mirror::Object* obj, jlong* tag) {
-  art::Thread* self = art::Thread::Current();
-  allow_disallow_lock_.AssertHeld(self);
-  Wait(self);
-
-  return RemoveLocked(self, obj, tag);
-}
-
-bool ObjectTagTable::RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag) {
-  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
-  if (it != tagged_objects_.end()) {
-    if (tag != nullptr) {
-      *tag = it->second;
-    }
-    tagged_objects_.erase(it);
-    return true;
-  }
-
-  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
-    // Under concurrent GC, there is a window between moving objects and sweeping of system
-    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-    // but still have from-space pointers in the table. Explicitly update the table once.
-    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
-
-    // Update the table.
-    UpdateTableWithReadBarrier();
-
-    // And try again.
-    return RemoveLocked(self, obj, tag);
-  }
-
-  // Not in here.
-  return false;
-}
+// Instantiate for jlong = JVMTI tags.
+template class JvmtiWeakTable<jlong>;
 
 bool ObjectTagTable::Set(art::mirror::Object* obj, jlong new_tag) {
   if (new_tag == 0) {
     jlong tmp;
     return Remove(obj, &tmp);
   }
-
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-  Wait(self);
-
-  return SetLocked(self, obj, new_tag);
+  return JvmtiWeakTable<jlong>::Set(obj, new_tag);
 }
 bool ObjectTagTable::SetLocked(art::mirror::Object* obj, jlong new_tag) {
   if (new_tag == 0) {
     jlong tmp;
     return RemoveLocked(obj, &tmp);
   }
-
-  art::Thread* self = art::Thread::Current();
-  allow_disallow_lock_.AssertHeld(self);
-  Wait(self);
-
-  return SetLocked(self, obj, new_tag);
+  return JvmtiWeakTable<jlong>::SetLocked(obj, new_tag);
 }
 
-bool ObjectTagTable::SetLocked(art::Thread* self, art::mirror::Object* obj, jlong new_tag) {
-  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
-  if (it != tagged_objects_.end()) {
-    it->second = new_tag;
-    return true;
-  }
-
-  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
-    // Under concurrent GC, there is a window between moving objects and sweeping of system
-    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-    // but still have from-space pointers in the table. Explicitly update the table once.
-    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
-
-    // Update the table.
-    UpdateTableWithReadBarrier();
-
-    // And try again.
-    return SetLocked(self, obj, new_tag);
-  }
-
-  // New element.
-  auto insert_it = tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(obj), new_tag);
-  DCHECK(insert_it.second);
-  return false;
+bool ObjectTagTable::DoesHandleNullOnSweep() {
+  return event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kObjectFree);
 }
-
-void ObjectTagTable::Sweep(art::IsMarkedVisitor* visitor) {
-  if (event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kObjectFree)) {
-    SweepImpl<true>(visitor);
-  } else {
-    SweepImpl<false>(visitor);
-  }
-
-  // Under concurrent GC, there is a window between moving objects and sweeping of system
-  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-  // but still have from-space pointers in the table. We explicitly update the table then
-  // to ensure we compare against to-space pointers. But we want to do this only once. Once
-  // sweeping is done, we know all objects are to-space pointers until the next GC cycle,
-  // so we re-enable the explicit update for the next marking.
-  update_since_last_sweep_ = false;
-}
-
-template <bool kHandleNull>
-void ObjectTagTable::SweepImpl(art::IsMarkedVisitor* visitor) {
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-
-  auto IsMarkedUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root ATTRIBUTE_UNUSED,
-                             art::mirror::Object* original_obj) {
-    return visitor->IsMarked(original_obj);
-  };
-
-  UpdateTableWith<decltype(IsMarkedUpdater),
-                  kHandleNull ? kCallHandleNull : kRemoveNull>(IsMarkedUpdater);
-}
-
 void ObjectTagTable::HandleNullSweep(jlong tag) {
   event_handler_->DispatchEvent<ArtJvmtiEvent::kObjectFree>(nullptr, tag);
 }
 
-template <typename T, ObjectTagTable::TableUpdateNullTarget kTargetNull>
-ALWAYS_INLINE inline void ObjectTagTable::UpdateTableWith(T& updater) {
-  // We optimistically hope that elements will still be well-distributed when re-inserting them.
-  // So play with the map mechanics, and postpone rehashing. This avoids the need of a side
-  // vector and two passes.
-  float original_max_load_factor = tagged_objects_.max_load_factor();
-  tagged_objects_.max_load_factor(std::numeric_limits<float>::max());
-  // For checking that a max load-factor actually does what we expect.
-  size_t original_bucket_count = tagged_objects_.bucket_count();
-
-  for (auto it = tagged_objects_.begin(); it != tagged_objects_.end();) {
-    DCHECK(!it->first.IsNull());
-    art::mirror::Object* original_obj = it->first.Read<art::kWithoutReadBarrier>();
-    art::mirror::Object* target_obj = updater(it->first, original_obj);
-    if (original_obj != target_obj) {
-      if (kTargetNull == kIgnoreNull && target_obj == nullptr) {
-        // Ignore null target, don't do anything.
-      } else {
-        jlong tag = it->second;
-        it = tagged_objects_.erase(it);
-        if (target_obj != nullptr) {
-          tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(target_obj), tag);
-          DCHECK_EQ(original_bucket_count, tagged_objects_.bucket_count());
-        } else if (kTargetNull == kCallHandleNull) {
-          HandleNullSweep(tag);
-        }
-        continue;  // Iterator was implicitly updated by erase.
-      }
-    }
-    it++;
-  }
-
-  tagged_objects_.max_load_factor(original_max_load_factor);
-  // TODO: consider rehash here.
-}
-
-template <typename T, class Allocator = std::allocator<T>>
-struct ReleasableContainer {
-  using allocator_type = Allocator;
-
-  explicit ReleasableContainer(const allocator_type& alloc, size_t reserve = 10)
-      : allocator(alloc),
-        data(reserve > 0 ? allocator.allocate(reserve) : nullptr),
-        size(0),
-        capacity(reserve) {
-  }
-
-  ~ReleasableContainer() {
-    if (data != nullptr) {
-      allocator.deallocate(data, capacity);
-      capacity = 0;
-      size = 0;
-    }
-  }
-
-  T* Release() {
-    T* tmp = data;
-
-    data = nullptr;
-    size = 0;
-    capacity = 0;
-
-    return tmp;
-  }
-
-  void Resize(size_t new_capacity) {
-    CHECK_GT(new_capacity, capacity);
-
-    T* tmp = allocator.allocate(new_capacity);
-    DCHECK(tmp != nullptr);
-    if (data != nullptr) {
-      memcpy(tmp, data, sizeof(T) * size);
-    }
-    T* old = data;
-    data = tmp;
-    allocator.deallocate(old, capacity);
-    capacity = new_capacity;
-  }
-
-  void Pushback(const T& elem) {
-    if (size == capacity) {
-      size_t new_capacity = 2 * capacity + 1;
-      Resize(new_capacity);
-    }
-    data[size++] = elem;
-  }
-
-  Allocator allocator;
-  T* data;
-  size_t size;
-  size_t capacity;
-};
-
-jvmtiError ObjectTagTable::GetTaggedObjects(jvmtiEnv* jvmti_env,
-                                            jint tag_count,
-                                            const jlong* tags,
-                                            jint* count_ptr,
-                                            jobject** object_result_ptr,
-                                            jlong** tag_result_ptr) {
-  if (tag_count < 0) {
-    return ERR(ILLEGAL_ARGUMENT);
-  }
-  if (tag_count > 0) {
-    for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
-      if (tags[i] == 0) {
-        return ERR(ILLEGAL_ARGUMENT);
-      }
-    }
-  }
-  if (tags == nullptr) {
-    return ERR(NULL_POINTER);
-  }
-  if (count_ptr == nullptr) {
-    return ERR(NULL_POINTER);
-  }
-
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-  Wait(self);
-
-  art::JNIEnvExt* jni_env = self->GetJniEnv();
-
-  constexpr size_t kDefaultSize = 10;
-  size_t initial_object_size;
-  size_t initial_tag_size;
-  if (tag_count == 0) {
-    initial_object_size = (object_result_ptr != nullptr) ? tagged_objects_.size() : 0;
-    initial_tag_size = (tag_result_ptr != nullptr) ? tagged_objects_.size() : 0;
-  } else {
-    initial_object_size = initial_tag_size = kDefaultSize;
-  }
-  JvmtiAllocator<void> allocator(jvmti_env);
-  ReleasableContainer<jobject, JvmtiAllocator<jobject>> selected_objects(allocator, initial_object_size);
-  ReleasableContainer<jlong, JvmtiAllocator<jlong>> selected_tags(allocator, initial_tag_size);
-
-  size_t count = 0;
-  for (auto& pair : tagged_objects_) {
-    bool select;
-    if (tag_count > 0) {
-      select = false;
-      for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
-        if (tags[i] == pair.second) {
-          select = true;
-          break;
-        }
-      }
-    } else {
-      select = true;
-    }
-
-    if (select) {
-      art::mirror::Object* obj = pair.first.Read<art::kWithReadBarrier>();
-      if (obj != nullptr) {
-        count++;
-        if (object_result_ptr != nullptr) {
-          selected_objects.Pushback(jni_env->AddLocalReference<jobject>(obj));
-        }
-        if (tag_result_ptr != nullptr) {
-          selected_tags.Pushback(pair.second);
-        }
-      }
-    }
-  }
-
-  if (object_result_ptr != nullptr) {
-    *object_result_ptr = selected_objects.Release();
-  }
-  if (tag_result_ptr != nullptr) {
-    *tag_result_ptr = selected_tags.Release();
-  }
-  *count_ptr = static_cast<jint>(count);
-  return ERR(NONE);
-}
-
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/object_tagging.h b/runtime/openjdkjvmti/object_tagging.h
index 0296f1a..b5a601c 100644
--- a/runtime/openjdkjvmti/object_tagging.h
+++ b/runtime/openjdkjvmti/object_tagging.h
@@ -1,17 +1,32 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  */
 
 #ifndef ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
@@ -20,62 +35,27 @@
 #include <unordered_map>
 
 #include "base/mutex.h"
-#include "gc/system_weak.h"
-#include "gc_root-inl.h"
 #include "globals.h"
 #include "jvmti.h"
+#include "jvmti_weak_table.h"
 #include "mirror/object.h"
-#include "thread-inl.h"
 
 namespace openjdkjvmti {
 
 class EventHandler;
 
-class ObjectTagTable : public art::gc::SystemWeakHolder {
+class ObjectTagTable FINAL : public JvmtiWeakTable<jlong> {
  public:
-  explicit ObjectTagTable(EventHandler* event_handler)
-      : art::gc::SystemWeakHolder(kTaggingLockLevel),
-        update_since_last_sweep_(false),
-        event_handler_(event_handler) {
+  explicit ObjectTagTable(EventHandler* event_handler) : event_handler_(event_handler) {
   }
 
-  void Add(art::mirror::Object* obj, jlong tag)
+  bool Set(art::mirror::Object* obj, jlong tag) OVERRIDE
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
-
-  bool Remove(art::mirror::Object* obj, jlong* tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-  bool RemoveLocked(art::mirror::Object* obj, jlong* tag)
+  bool SetLocked(art::mirror::Object* obj, jlong tag) OVERRIDE
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
 
-  bool Set(art::mirror::Object* obj, jlong tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-  bool SetLocked(art::mirror::Object* obj, jlong tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  bool GetTag(art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_) {
-    art::Thread* self = art::Thread::Current();
-    art::MutexLock mu(self, allow_disallow_lock_);
-    Wait(self);
-
-    return GetTagLocked(self, obj, result);
-  }
-  bool GetTagLocked(art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_) {
-    art::Thread* self = art::Thread::Current();
-    allow_disallow_lock_.AssertHeld(self);
-    Wait(self);
-
-    return GetTagLocked(self, obj, result);
-  }
-
   jlong GetTagOrZero(art::mirror::Object* obj)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_) {
@@ -91,108 +71,11 @@
     return tmp;
   }
 
-  void Sweep(art::IsMarkedVisitor* visitor)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-
-  jvmtiError GetTaggedObjects(jvmtiEnv* jvmti_env,
-                              jint tag_count,
-                              const jlong* tags,
-                              jint* count_ptr,
-                              jobject** object_result_ptr,
-                              jlong** tag_result_ptr)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-
-  void Lock() ACQUIRE(allow_disallow_lock_);
-  void Unlock() RELEASE(allow_disallow_lock_);
-  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+ protected:
+  bool DoesHandleNullOnSweep() OVERRIDE;
+  void HandleNullSweep(jlong tag) OVERRIDE;
 
  private:
-  bool SetLocked(art::Thread* self, art::mirror::Object* obj, jlong tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  bool RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  bool GetTagLocked(art::Thread* self, art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_) {
-    auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
-    if (it != tagged_objects_.end()) {
-      *result = it->second;
-      return true;
-    }
-
-    if (art::kUseReadBarrier &&
-        self != nullptr &&
-        self->GetIsGcMarking() &&
-        !update_since_last_sweep_) {
-      return GetTagSlowPath(self, obj, result);
-    }
-
-    return false;
-  }
-
-  // Slow-path for GetTag. We didn't find the object, but we might be storing from-pointers and
-  // are asked to retrieve with a to-pointer.
-  bool GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  // Update the table by doing read barriers on each element, ensuring that to-space pointers
-  // are stored.
-  void UpdateTableWithReadBarrier()
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  template <bool kHandleNull>
-  void SweepImpl(art::IsMarkedVisitor* visitor)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-  void HandleNullSweep(jlong tag);
-
-  enum TableUpdateNullTarget {
-    kIgnoreNull,
-    kRemoveNull,
-    kCallHandleNull
-  };
-
-  template <typename T, TableUpdateNullTarget kTargetNull>
-  void UpdateTableWith(T& updater)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  struct HashGcRoot {
-    size_t operator()(const art::GcRoot<art::mirror::Object>& r) const
-        REQUIRES_SHARED(art::Locks::mutator_lock_) {
-      return reinterpret_cast<uintptr_t>(r.Read<art::kWithoutReadBarrier>());
-    }
-  };
-
-  struct EqGcRoot {
-    bool operator()(const art::GcRoot<art::mirror::Object>& r1,
-                    const art::GcRoot<art::mirror::Object>& r2) const
-        REQUIRES_SHARED(art::Locks::mutator_lock_) {
-      return r1.Read<art::kWithoutReadBarrier>() == r2.Read<art::kWithoutReadBarrier>();
-    }
-  };
-
-  // The tag table is used when visiting roots. So it needs to have a low lock level.
-  static constexpr art::LockLevel kTaggingLockLevel =
-      static_cast<art::LockLevel>(art::LockLevel::kAbortLock + 1);
-
-  std::unordered_map<art::GcRoot<art::mirror::Object>,
-                     jlong,
-                     HashGcRoot,
-                     EqGcRoot> tagged_objects_
-      GUARDED_BY(allow_disallow_lock_)
-      GUARDED_BY(art::Locks::mutator_lock_);
-  // To avoid repeatedly scanning the whole table, remember if we did that since the last sweep.
-  bool update_since_last_sweep_;
-
   EventHandler* event_handler_;
 };
 
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index a8a0ded..2d1b25e 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -62,6 +62,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "ti_class_loader.h"
+#include "ti_phase.h"
 #include "ti_redefine.h"
 #include "utils.h"
 
@@ -142,6 +143,18 @@
       // It is a primitive or array. Just return
       return;
     }
+    jvmtiPhase phase = PhaseUtil::GetPhaseUnchecked();
+    if (UNLIKELY(phase != JVMTI_PHASE_START && phase != JVMTI_PHASE_LIVE)) {
+      // We want to wait until we are at least in the START phase so that all WellKnownClasses and
+      // mirror classes have been initialized and loaded. The runtime relies on these classes having
+      // specific fields and methods present. Since PreDefine hooks don't need to abide by this
+      // restriction we will simply not send the event for these classes.
+      LOG(WARNING) << "Ignoring load of class <" << descriptor << "> as it is being loaded during "
+                   << "runtime initialization.";
+      return;
+    }
+
+    // Strip the 'L' and ';' from the descriptor
     std::string name(std::string(descriptor).substr(1, strlen(descriptor) - 2));
 
     art::Thread* self = art::Thread::Current();
@@ -673,18 +686,17 @@
     return ERR(INVALID_CLASS);
   }
 
-  JvmtiUniquePtr sig_copy;
+  JvmtiUniquePtr<char[]> sig_copy;
   if (signature_ptr != nullptr) {
     std::string storage;
     const char* descriptor = klass->GetDescriptor(&storage);
 
-    unsigned char* tmp;
-    jvmtiError ret = CopyString(env, descriptor, &tmp);
-    if (ret != ERR(NONE)) {
+    jvmtiError ret;
+    sig_copy = CopyString(env, descriptor, &ret);
+    if (sig_copy == nullptr) {
       return ret;
     }
-    sig_copy = MakeJvmtiUniquePtr(env, tmp);
-    *signature_ptr = reinterpret_cast<char*>(tmp);
+    *signature_ptr = sig_copy.get();
   }
 
   if (generic_ptr != nullptr) {
@@ -700,12 +712,12 @@
           oss << str_array->Get(i)->ToModifiedUtf8();
         }
         std::string output_string = oss.str();
-        unsigned char* tmp;
-        jvmtiError ret = CopyString(env, output_string.c_str(), &tmp);
-        if (ret != ERR(NONE)) {
+        jvmtiError ret;
+        JvmtiUniquePtr<char[]> copy = CopyString(env, output_string.c_str(), &ret);
+        if (copy == nullptr) {
           return ret;
         }
-        *generic_ptr = reinterpret_cast<char*>(tmp);
+        *generic_ptr = copy.release();
       } else if (soa.Self()->IsExceptionPending()) {
         // TODO: Should we report an error here?
         soa.Self()->ClearException();
diff --git a/runtime/openjdkjvmti/ti_class_definition.h b/runtime/openjdkjvmti/ti_class_definition.h
index dbe5da2..3c251d4 100644
--- a/runtime/openjdkjvmti/ti_class_definition.h
+++ b/runtime/openjdkjvmti/ti_class_definition.h
@@ -46,7 +46,7 @@
   std::string name;
   jobject protection_domain;
   jint dex_len;
-  JvmtiUniquePtr dex_data;
+  JvmtiUniquePtr<unsigned char> dex_data;
   art::ArraySlice<const unsigned char> original_dex_file;
 
   ArtClassDefinition() = default;
diff --git a/runtime/openjdkjvmti/ti_class_loader.cc b/runtime/openjdkjvmti/ti_class_loader.cc
index d05f579..66357eb 100644
--- a/runtime/openjdkjvmti/ti_class_loader.cc
+++ b/runtime/openjdkjvmti/ti_class_loader.cc
@@ -105,7 +105,6 @@
   // mCookie is nulled out if the DexFile has been closed but mInternalCookie sticks around until
   // the object is finalized. Since they always point to the same array if mCookie is not null we
   // just use the mInternalCookie field. We will update one or both of these fields later.
-  // TODO Should I get the class from the classloader or directly?
   art::ArtField* internal_cookie_field = java_dex_file_obj->GetClass()->FindDeclaredInstanceField(
       "mInternalCookie", "Ljava/lang/Object;");
   // TODO Add check that mCookie is either null or same as mInternalCookie
@@ -113,7 +112,6 @@
   return internal_cookie_field->GetObject(java_dex_file_obj.Get())->AsLongArray();
 }
 
-// TODO Really wishing I had that mirror of java.lang.DexFile now.
 art::ObjPtr<art::mirror::LongArray> ClassLoaderHelper::AllocateNewDexFileCookie(
     art::Thread* self,
     art::Handle<art::mirror::LongArray> cookie,
@@ -128,8 +126,6 @@
     return nullptr;
   }
   // Copy the oat-dex field at the start.
-  // TODO Should I clear this field?
-  // TODO This is a really crappy thing here with the first element being different.
   new_cookie->SetWithoutChecks<false>(0, cookie->GetWithoutChecks(0));
   // This must match the casts in runtime/native/dalvik_system_DexFile.cc:ConvertDexFilesToJavaArray
   new_cookie->SetWithoutChecks<false>(
diff --git a/runtime/openjdkjvmti/ti_field.cc b/runtime/openjdkjvmti/ti_field.cc
index 131e6c3..1e5fbda 100644
--- a/runtime/openjdkjvmti/ti_field.cc
+++ b/runtime/openjdkjvmti/ti_field.cc
@@ -63,34 +63,31 @@
   art::ScopedObjectAccess soa(art::Thread::Current());
   art::ArtField* art_field = art::jni::DecodeArtField(field);
 
-  JvmtiUniquePtr name_copy;
+  JvmtiUniquePtr<char[]> name_copy;
   if (name_ptr != nullptr) {
     const char* field_name = art_field->GetName();
     if (field_name == nullptr) {
       field_name = "<error>";
     }
-    unsigned char* tmp;
-    jvmtiError ret = CopyString(env, field_name, &tmp);
-    if (ret != ERR(NONE)) {
+    jvmtiError ret;
+    name_copy = CopyString(env, field_name, &ret);
+    if (name_copy == nullptr) {
       return ret;
     }
-    name_copy = MakeJvmtiUniquePtr(env, tmp);
-    *name_ptr = reinterpret_cast<char*>(tmp);
+    *name_ptr = name_copy.get();
   }
 
-  JvmtiUniquePtr signature_copy;
+  JvmtiUniquePtr<char[]> signature_copy;
   if (signature_ptr != nullptr) {
     const char* sig = art_field->GetTypeDescriptor();
-    unsigned char* tmp;
-    jvmtiError ret = CopyString(env, sig, &tmp);
-    if (ret != ERR(NONE)) {
+    jvmtiError ret;
+    signature_copy = CopyString(env, sig, &ret);
+    if (signature_copy == nullptr) {
       return ret;
     }
-    signature_copy = MakeJvmtiUniquePtr(env, tmp);
-    *signature_ptr = reinterpret_cast<char*>(tmp);
+    *signature_ptr = signature_copy.get();
   }
 
-  // TODO: Support generic signature.
   if (generic_ptr != nullptr) {
     *generic_ptr = nullptr;
     if (!art_field->GetDeclaringClass()->IsProxyClass()) {
@@ -102,12 +99,12 @@
           oss << str_array->Get(i)->ToModifiedUtf8();
         }
         std::string output_string = oss.str();
-        unsigned char* tmp;
-        jvmtiError ret = CopyString(env, output_string.c_str(), &tmp);
-        if (ret != ERR(NONE)) {
+        jvmtiError ret;
+        JvmtiUniquePtr<char[]> copy = CopyString(env, output_string.c_str(), &ret);
+        if (copy == nullptr) {
           return ret;
         }
-        *generic_ptr = reinterpret_cast<char*>(tmp);
+        *generic_ptr = copy.release();
       } else if (soa.Self()->IsExceptionPending()) {
         // TODO: Should we report an error here?
         soa.Self()->ClearException();
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index fe3e52b..c2495e3 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -25,12 +25,14 @@
 #include "gc_root-inl.h"
 #include "jni_env_ext.h"
 #include "jni_internal.h"
+#include "jvmti_weak_table-inl.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "object_callbacks.h"
 #include "object_tagging.h"
 #include "obj_ptr-inl.h"
+#include "primitive.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
@@ -38,28 +40,584 @@
 
 namespace openjdkjvmti {
 
-struct IterateThroughHeapData {
-  IterateThroughHeapData(HeapUtil* _heap_util,
-                         jint heap_filter,
-                         art::ObjPtr<art::mirror::Class> klass,
-                         const jvmtiHeapCallbacks* _callbacks,
-                         const void* _user_data)
-      : heap_util(_heap_util),
-        filter_klass(klass),
-        callbacks(_callbacks),
-        user_data(_user_data),
-        filter_out_tagged((heap_filter & JVMTI_HEAP_FILTER_TAGGED) != 0),
+namespace {
+
+struct IndexCache {
+  // The number of interface fields implemented by the class. This is a prefix to all assigned
+  // field indices.
+  size_t interface_fields;
+
+  // It would be nice to also cache the following, but it is complicated to wire up into the
+  // generic visit:
+  // The number of fields in interfaces and superclasses. This is the first index assigned to
+  // fields of the class.
+  // size_t superclass_fields;
+};
+using IndexCachingTable = JvmtiWeakTable<IndexCache>;
+
+static IndexCachingTable gIndexCachingTable;
+
+// Report the contents of a string, if a callback is set.
+jint ReportString(art::ObjPtr<art::mirror::Object> obj,
+                  jvmtiEnv* env,
+                  ObjectTagTable* tag_table,
+                  const jvmtiHeapCallbacks* cb,
+                  const void* user_data) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  if (UNLIKELY(cb->string_primitive_value_callback != nullptr) && obj->IsString()) {
+    art::ObjPtr<art::mirror::String> str = obj->AsString();
+    int32_t string_length = str->GetLength();
+    JvmtiUniquePtr<uint16_t[]> data;
+
+    if (string_length > 0) {
+      jvmtiError alloc_error;
+      data = AllocJvmtiUniquePtr<uint16_t[]>(env, string_length, &alloc_error);
+      if (data == nullptr) {
+        // TODO: Not really sure what to do here. Should we abort the iteration and go all the way
+        //       back? For now just warn.
+        LOG(WARNING) << "Unable to allocate buffer for string reporting! Silently dropping value."
+                     << " >" << str->ToModifiedUtf8() << "<";
+        return 0;
+      }
+
+      if (str->IsCompressed()) {
+        uint8_t* compressed_data = str->GetValueCompressed();
+        for (int32_t i = 0; i != string_length; ++i) {
+          data[i] = compressed_data[i];
+        }
+      } else {
+        // Can copy directly.
+        memcpy(data.get(), str->GetValue(), string_length * sizeof(uint16_t));
+      }
+    }
+
+    const jlong class_tag = tag_table->GetTagOrZero(obj->GetClass());
+    jlong string_tag = tag_table->GetTagOrZero(obj.Ptr());
+    const jlong saved_string_tag = string_tag;
+
+    jint result = cb->string_primitive_value_callback(class_tag,
+                                                      obj->SizeOf(),
+                                                      &string_tag,
+                                                      data.get(),
+                                                      string_length,
+                                                      const_cast<void*>(user_data));
+    if (string_tag != saved_string_tag) {
+      tag_table->Set(obj.Ptr(), string_tag);
+    }
+
+    return result;
+  }
+  return 0;
+}
+
+// Report the contents of a primitive array, if a callback is set.
+jint ReportPrimitiveArray(art::ObjPtr<art::mirror::Object> obj,
+                          jvmtiEnv* env,
+                          ObjectTagTable* tag_table,
+                          const jvmtiHeapCallbacks* cb,
+                          const void* user_data) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  if (UNLIKELY(cb->array_primitive_value_callback != nullptr) &&
+      obj->IsArrayInstance() &&
+      !obj->IsObjectArray()) {
+    art::ObjPtr<art::mirror::Array> array = obj->AsArray();
+    int32_t array_length = array->GetLength();
+    size_t component_size = array->GetClass()->GetComponentSize();
+    art::Primitive::Type art_prim_type = array->GetClass()->GetComponentType()->GetPrimitiveType();
+    jvmtiPrimitiveType prim_type =
+        static_cast<jvmtiPrimitiveType>(art::Primitive::Descriptor(art_prim_type)[0]);
+    DCHECK(prim_type == JVMTI_PRIMITIVE_TYPE_BOOLEAN ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_BYTE ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_CHAR ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_SHORT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_INT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_LONG ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_FLOAT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_DOUBLE);
+
+    const jlong class_tag = tag_table->GetTagOrZero(obj->GetClass());
+    jlong array_tag = tag_table->GetTagOrZero(obj.Ptr());
+    const jlong saved_array_tag = array_tag;
+
+    jint result;
+    if (array_length == 0) {
+      result = cb->array_primitive_value_callback(class_tag,
+                                                  obj->SizeOf(),
+                                                  &array_tag,
+                                                  0,
+                                                  prim_type,
+                                                  nullptr,
+                                                  const_cast<void*>(user_data));
+    } else {
+      jvmtiError alloc_error;
+      JvmtiUniquePtr<char[]> data = AllocJvmtiUniquePtr<char[]>(env,
+                                                                array_length * component_size,
+                                                                &alloc_error);
+      if (data == nullptr) {
+        // TODO: Not really sure what to do here. Should we abort the iteration and go all the way
+        //       back? For now just warn.
+        LOG(WARNING) << "Unable to allocate buffer for array reporting! Silently dropping value.";
+        return 0;
+      }
+
+      memcpy(data.get(), array->GetRawData(component_size, 0), array_length * component_size);
+
+      result = cb->array_primitive_value_callback(class_tag,
+                                                  obj->SizeOf(),
+                                                  &array_tag,
+                                                  array_length,
+                                                  prim_type,
+                                                  data.get(),
+                                                  const_cast<void*>(user_data));
+    }
+
+    if (array_tag != saved_array_tag) {
+      tag_table->Set(obj.Ptr(), array_tag);
+    }
+
+    return result;
+  }
+  return 0;
+}
+
+template <typename UserData>
+bool VisitorFalse(art::ObjPtr<art::mirror::Object> obj ATTRIBUTE_UNUSED,
+                  art::ObjPtr<art::mirror::Class> klass ATTRIBUTE_UNUSED,
+                  art::ArtField& field ATTRIBUTE_UNUSED,
+                  size_t field_index ATTRIBUTE_UNUSED,
+                  UserData* user_data ATTRIBUTE_UNUSED) {
+  return false;
+}
+
+template <typename UserData, bool kCallVisitorOnRecursion>
+class FieldVisitor {
+ public:
+  // Report the contents of a primitive fields of the given object, if a callback is set.
+  template <typename StaticPrimitiveVisitor,
+            typename StaticReferenceVisitor,
+            typename InstancePrimitiveVisitor,
+            typename InstanceReferenceVisitor>
+  static bool ReportFields(art::ObjPtr<art::mirror::Object> obj,
+                           UserData* user_data,
+                           StaticPrimitiveVisitor& static_prim_visitor,
+                           StaticReferenceVisitor& static_ref_visitor,
+                           InstancePrimitiveVisitor& instance_prim_visitor,
+                           InstanceReferenceVisitor& instance_ref_visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    FieldVisitor fv(user_data);
+
+    if (obj->IsClass()) {
+      // When visiting a class, we only visit the static fields of the given class. No field of
+      // superclasses is visited.
+      art::ObjPtr<art::mirror::Class> klass = obj->AsClass();
+      // Only report fields on resolved classes. We need valid field data.
+      if (!klass->IsResolved()) {
+        return false;
+      }
+      return fv.ReportFieldsImpl(nullptr,
+                                 obj->AsClass(),
+                                 obj->AsClass()->IsInterface(),
+                                 static_prim_visitor,
+                                 static_ref_visitor,
+                                 instance_prim_visitor,
+                                 instance_ref_visitor);
+    } else {
+      // See comment above. Just double-checking here, but an instance *should* mean the class was
+      // resolved.
+      DCHECK(obj->GetClass()->IsResolved() || obj->GetClass()->IsErroneousResolved());
+      return fv.ReportFieldsImpl(obj,
+                                 obj->GetClass(),
+                                 false,
+                                 static_prim_visitor,
+                                 static_ref_visitor,
+                                 instance_prim_visitor,
+                                 instance_ref_visitor);
+    }
+  }
+
+ private:
+  explicit FieldVisitor(UserData* user_data) : user_data_(user_data) {}
+
+  // Report the contents of fields of the given object. If obj is null, report the static fields,
+  // otherwise the instance fields.
+  template <typename StaticPrimitiveVisitor,
+            typename StaticReferenceVisitor,
+            typename InstancePrimitiveVisitor,
+            typename InstanceReferenceVisitor>
+  bool ReportFieldsImpl(art::ObjPtr<art::mirror::Object> obj,
+                        art::ObjPtr<art::mirror::Class> klass,
+                        bool skip_java_lang_object,
+                        StaticPrimitiveVisitor& static_prim_visitor,
+                        StaticReferenceVisitor& static_ref_visitor,
+                        InstancePrimitiveVisitor& instance_prim_visitor,
+                        InstanceReferenceVisitor& instance_ref_visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    // Compute the offset of field indices.
+    size_t interface_field_count = CountInterfaceFields(klass);
+
+    size_t tmp;
+    bool aborted = ReportFieldsRecursive(obj,
+                                         klass,
+                                         interface_field_count,
+                                         skip_java_lang_object,
+                                         static_prim_visitor,
+                                         static_ref_visitor,
+                                         instance_prim_visitor,
+                                         instance_ref_visitor,
+                                         &tmp);
+    return aborted;
+  }
+
+  // Visit primitive fields in an object (instance). Return true if the visit was aborted.
+  template <typename StaticPrimitiveVisitor,
+            typename StaticReferenceVisitor,
+            typename InstancePrimitiveVisitor,
+            typename InstanceReferenceVisitor>
+  bool ReportFieldsRecursive(art::ObjPtr<art::mirror::Object> obj,
+                             art::ObjPtr<art::mirror::Class> klass,
+                             size_t interface_fields,
+                             bool skip_java_lang_object,
+                             StaticPrimitiveVisitor& static_prim_visitor,
+                             StaticReferenceVisitor& static_ref_visitor,
+                             InstancePrimitiveVisitor& instance_prim_visitor,
+                             InstanceReferenceVisitor& instance_ref_visitor,
+                             size_t* field_index_out)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    DCHECK(klass != nullptr);
+    size_t field_index;
+    if (klass->GetSuperClass() == nullptr) {
+      // j.l.Object. Start with the fields from interfaces.
+      field_index = interface_fields;
+      if (skip_java_lang_object) {
+        *field_index_out = field_index;
+        return false;
+      }
+    } else {
+      // Report superclass fields.
+      if (kCallVisitorOnRecursion) {
+        if (ReportFieldsRecursive(obj,
+                                  klass->GetSuperClass(),
+                                  interface_fields,
+                                  skip_java_lang_object,
+                                  static_prim_visitor,
+                                  static_ref_visitor,
+                                  instance_prim_visitor,
+                                  instance_ref_visitor,
+                                  &field_index)) {
+          return true;
+        }
+      } else {
+        // Still call, but with empty visitor. This is required for correct counting.
+        ReportFieldsRecursive(obj,
+                              klass->GetSuperClass(),
+                              interface_fields,
+                              skip_java_lang_object,
+                              VisitorFalse<UserData>,
+                              VisitorFalse<UserData>,
+                              VisitorFalse<UserData>,
+                              VisitorFalse<UserData>,
+                              &field_index);
+      }
+    }
+
+    // Now visit fields for the current klass.
+
+    for (auto& static_field : klass->GetSFields()) {
+      if (static_field.IsPrimitiveType()) {
+        if (static_prim_visitor(obj,
+                                klass,
+                                static_field,
+                                field_index,
+                                user_data_)) {
+          return true;
+        }
+      } else {
+        if (static_ref_visitor(obj,
+                               klass,
+                               static_field,
+                               field_index,
+                               user_data_)) {
+          return true;
+        }
+      }
+      field_index++;
+    }
+
+    for (auto& instance_field : klass->GetIFields()) {
+      if (instance_field.IsPrimitiveType()) {
+        if (instance_prim_visitor(obj,
+                                  klass,
+                                  instance_field,
+                                  field_index,
+                                  user_data_)) {
+          return true;
+        }
+      } else {
+        if (instance_ref_visitor(obj,
+                                 klass,
+                                 instance_field,
+                                 field_index,
+                                 user_data_)) {
+          return true;
+        }
+      }
+      field_index++;
+    }
+
+    *field_index_out = field_index;
+    return false;
+  }
+
+  // Implements a visit of the implemented interfaces of a given class.
+  template <typename T>
+  struct RecursiveInterfaceVisit {
+    static void VisitStatic(art::Thread* self, art::ObjPtr<art::mirror::Class> klass, T& visitor)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      RecursiveInterfaceVisit rv;
+      rv.Visit(self, klass, visitor);
+    }
+
+    void Visit(art::Thread* self, art::ObjPtr<art::mirror::Class> klass, T& visitor)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      // First visit the parent, to get the order right.
+      // (We do this in preparation for actual visiting of interface fields.)
+      if (klass->GetSuperClass() != nullptr) {
+        Visit(self, klass->GetSuperClass(), visitor);
+      }
+      for (uint32_t i = 0; i != klass->NumDirectInterfaces(); ++i) {
+        art::ObjPtr<art::mirror::Class> inf_klass =
+            art::mirror::Class::GetDirectInterface(self, klass, i);
+        DCHECK(inf_klass != nullptr);
+        VisitInterface(self, inf_klass, visitor);
+      }
+    }
+
+    void VisitInterface(art::Thread* self, art::ObjPtr<art::mirror::Class> inf_klass, T& visitor)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      auto it = visited_interfaces.find(inf_klass.Ptr());
+      if (it != visited_interfaces.end()) {
+        return;
+      }
+      visited_interfaces.insert(inf_klass.Ptr());
+
+      // Let the visitor know about this one. Note that this order is acceptable, as the ordering
+      // of these fields never matters for known visitors.
+      visitor(inf_klass);
+
+      // Now visit the superinterfaces.
+      for (uint32_t i = 0; i != inf_klass->NumDirectInterfaces(); ++i) {
+        art::ObjPtr<art::mirror::Class> super_inf_klass =
+            art::mirror::Class::GetDirectInterface(self, inf_klass, i);
+        DCHECK(super_inf_klass != nullptr);
+        VisitInterface(self, super_inf_klass, visitor);
+      }
+    }
+
+    std::unordered_set<art::mirror::Class*> visited_interfaces;
+  };
+
+  // Counting interface fields. Note that we cannot use the interface table, as that only contains
+  // "non-marker" interfaces (= interfaces with methods).
+  static size_t CountInterfaceFields(art::ObjPtr<art::mirror::Class> klass)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    // Do we have a cached value?
+    IndexCache tmp;
+    if (gIndexCachingTable.GetTag(klass.Ptr(), &tmp)) {
+      return tmp.interface_fields;
+    }
+
+    size_t count = 0;
+    auto visitor = [&count](art::ObjPtr<art::mirror::Class> inf_klass)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      DCHECK(inf_klass->IsInterface());
+      DCHECK_EQ(0u, inf_klass->NumInstanceFields());
+      count += inf_klass->NumStaticFields();
+    };
+    RecursiveInterfaceVisit<decltype(visitor)>::VisitStatic(art::Thread::Current(), klass, visitor);
+
+    // Store this into the cache.
+    tmp.interface_fields = count;
+    gIndexCachingTable.Set(klass.Ptr(), tmp);
+
+    return count;
+  }
+
+  UserData* user_data_;
+};
+
+// Debug helper. Prints the structure of an object.
+template <bool kStatic, bool kRef>
+struct DumpVisitor {
+  static bool Callback(art::ObjPtr<art::mirror::Object> obj ATTRIBUTE_UNUSED,
+                       art::ObjPtr<art::mirror::Class> klass ATTRIBUTE_UNUSED,
+                       art::ArtField& field,
+                       size_t field_index,
+                       void* user_data ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    LOG(ERROR) << (kStatic ? "static " : "instance ")
+               << (kRef ? "ref " : "primitive ")
+               << field.PrettyField()
+               << " @ "
+               << field_index;
+    return false;
+  }
+};
+ATTRIBUTE_UNUSED
+void DumpObjectFields(art::ObjPtr<art::mirror::Object> obj)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  if (obj->IsClass()) {
+    FieldVisitor<void, false>:: ReportFields(obj,
+                                             nullptr,
+                                             DumpVisitor<true, false>::Callback,
+                                             DumpVisitor<true, true>::Callback,
+                                             DumpVisitor<false, false>::Callback,
+                                             DumpVisitor<false, true>::Callback);
+  } else {
+    FieldVisitor<void, true>::ReportFields(obj,
+                                           nullptr,
+                                           DumpVisitor<true, false>::Callback,
+                                           DumpVisitor<true, true>::Callback,
+                                           DumpVisitor<false, false>::Callback,
+                                           DumpVisitor<false, true>::Callback);
+  }
+}
+
+class ReportPrimitiveField {
+ public:
+  static bool Report(art::ObjPtr<art::mirror::Object> obj,
+                     ObjectTagTable* tag_table,
+                     const jvmtiHeapCallbacks* cb,
+                     const void* user_data)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (UNLIKELY(cb->primitive_field_callback != nullptr)) {
+      jlong class_tag = tag_table->GetTagOrZero(obj->GetClass());
+      ReportPrimitiveField rpf(tag_table, class_tag, cb, user_data);
+      if (obj->IsClass()) {
+        return FieldVisitor<ReportPrimitiveField, false>::ReportFields(
+            obj,
+            &rpf,
+            ReportPrimitiveFieldCallback<true>,
+            VisitorFalse<ReportPrimitiveField>,
+            VisitorFalse<ReportPrimitiveField>,
+            VisitorFalse<ReportPrimitiveField>);
+      } else {
+        return FieldVisitor<ReportPrimitiveField, true>::ReportFields(
+            obj,
+            &rpf,
+            VisitorFalse<ReportPrimitiveField>,
+            VisitorFalse<ReportPrimitiveField>,
+            ReportPrimitiveFieldCallback<false>,
+            VisitorFalse<ReportPrimitiveField>);
+      }
+    }
+    return false;
+  }
+
+
+ private:
+  ReportPrimitiveField(ObjectTagTable* tag_table,
+                       jlong class_tag,
+                       const jvmtiHeapCallbacks* cb,
+                       const void* user_data)
+      : tag_table_(tag_table), class_tag_(class_tag), cb_(cb), user_data_(user_data) {}
+
+  template <bool kReportStatic>
+  static bool ReportPrimitiveFieldCallback(art::ObjPtr<art::mirror::Object> obj,
+                                           art::ObjPtr<art::mirror::Class> klass,
+                                           art::ArtField& field,
+                                           size_t field_index,
+                                           ReportPrimitiveField* user_data)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    art::Primitive::Type art_prim_type = field.GetTypeAsPrimitiveType();
+    jvmtiPrimitiveType prim_type =
+        static_cast<jvmtiPrimitiveType>(art::Primitive::Descriptor(art_prim_type)[0]);
+    DCHECK(prim_type == JVMTI_PRIMITIVE_TYPE_BOOLEAN ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_BYTE ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_CHAR ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_SHORT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_INT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_LONG ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_FLOAT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_DOUBLE);
+    jvmtiHeapReferenceInfo info;
+    info.field.index = field_index;
+
+    jvalue value;
+    memset(&value, 0, sizeof(jvalue));
+    art::ObjPtr<art::mirror::Object> src = kReportStatic ? klass : obj;
+    switch (art_prim_type) {
+      case art::Primitive::Type::kPrimBoolean:
+        value.z = field.GetBoolean(src) == 0 ? JNI_FALSE : JNI_TRUE;
+        break;
+      case art::Primitive::Type::kPrimByte:
+        value.b = field.GetByte(src);
+        break;
+      case art::Primitive::Type::kPrimChar:
+        value.c = field.GetChar(src);
+        break;
+      case art::Primitive::Type::kPrimShort:
+        value.s = field.GetShort(src);
+        break;
+      case art::Primitive::Type::kPrimInt:
+        value.i = field.GetInt(src);
+        break;
+      case art::Primitive::Type::kPrimLong:
+        value.j = field.GetLong(src);
+        break;
+      case art::Primitive::Type::kPrimFloat:
+        value.f = field.GetFloat(src);
+        break;
+      case art::Primitive::Type::kPrimDouble:
+        value.d = field.GetDouble(src);
+        break;
+      case art::Primitive::Type::kPrimVoid:
+      case art::Primitive::Type::kPrimNot: {
+        LOG(FATAL) << "Should not reach here";
+        UNREACHABLE();
+      }
+    }
+
+    jlong obj_tag = user_data->tag_table_->GetTagOrZero(src.Ptr());
+    const jlong saved_obj_tag = obj_tag;
+
+    jint ret = user_data->cb_->primitive_field_callback(kReportStatic
+                                                            ? JVMTI_HEAP_REFERENCE_STATIC_FIELD
+                                                            : JVMTI_HEAP_REFERENCE_FIELD,
+                                                        &info,
+                                                        user_data->class_tag_,
+                                                        &obj_tag,
+                                                        value,
+                                                        prim_type,
+                                                        const_cast<void*>(user_data->user_data_));
+
+    if (saved_obj_tag != obj_tag) {
+      user_data->tag_table_->Set(src.Ptr(), obj_tag);
+    }
+
+    if ((ret & JVMTI_VISIT_ABORT) != 0) {
+      return true;
+    }
+
+    return false;
+  }
+
+  ObjectTagTable* tag_table_;
+  jlong class_tag_;
+  const jvmtiHeapCallbacks* cb_;
+  const void* user_data_;
+};
+
+struct HeapFilter {
+  explicit HeapFilter(jint heap_filter)
+      : filter_out_tagged((heap_filter & JVMTI_HEAP_FILTER_TAGGED) != 0),
         filter_out_untagged((heap_filter & JVMTI_HEAP_FILTER_UNTAGGED) != 0),
         filter_out_class_tagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_TAGGED) != 0),
         filter_out_class_untagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_UNTAGGED) != 0),
         any_filter(filter_out_tagged ||
                    filter_out_untagged ||
                    filter_out_class_tagged ||
-                   filter_out_class_untagged),
-        stop_reports(false) {
+                   filter_out_class_untagged) {
   }
 
-  bool ShouldReportByHeapFilter(jlong tag, jlong class_tag) {
+  bool ShouldReportByHeapFilter(jlong tag, jlong class_tag) const {
     if (!any_filter) {
       return true;
     }
@@ -76,15 +634,45 @@
     return true;
   }
 
-  HeapUtil* heap_util;
-  art::ObjPtr<art::mirror::Class> filter_klass;
-  const jvmtiHeapCallbacks* callbacks;
-  const void* user_data;
   const bool filter_out_tagged;
   const bool filter_out_untagged;
   const bool filter_out_class_tagged;
   const bool filter_out_class_untagged;
   const bool any_filter;
+};
+
+}  // namespace
+
+void HeapUtil::Register() {
+  art::Runtime::Current()->AddSystemWeakHolder(&gIndexCachingTable);
+}
+
+void HeapUtil::Unregister() {
+  art::Runtime::Current()->RemoveSystemWeakHolder(&gIndexCachingTable);
+}
+
+struct IterateThroughHeapData {
+  IterateThroughHeapData(HeapUtil* _heap_util,
+                         jvmtiEnv* _env,
+                         art::ObjPtr<art::mirror::Class> klass,
+                         jint _heap_filter,
+                         const jvmtiHeapCallbacks* _callbacks,
+                         const void* _user_data)
+      : heap_util(_heap_util),
+        heap_filter(_heap_filter),
+        filter_klass(klass),
+        env(_env),
+        callbacks(_callbacks),
+        user_data(_user_data),
+        stop_reports(false) {
+  }
+
+  HeapUtil* heap_util;
+  const HeapFilter heap_filter;
+  art::ObjPtr<art::mirror::Class> filter_klass;
+  jvmtiEnv* env;
+  const jvmtiHeapCallbacks* callbacks;
+  const void* user_data;
 
   bool stop_reports;
 };
@@ -107,12 +695,10 @@
   ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag);
   // For simplicity, even if we find a tag = 0, assume 0 = not tagged.
 
-  if (!ithd->ShouldReportByHeapFilter(tag, class_tag)) {
+  if (!ithd->heap_filter.ShouldReportByHeapFilter(tag, class_tag)) {
     return;
   }
 
-  // TODO: Handle array_primitive_value_callback.
-
   if (ithd->filter_klass != nullptr) {
     if (ithd->filter_klass != klass) {
       return;
@@ -139,11 +725,33 @@
 
   ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0;
 
-  // TODO Implement array primitive and string primitive callback.
-  // TODO Implement primitive field callback.
+  if (!ithd->stop_reports) {
+    jint string_ret = ReportString(obj,
+                                   ithd->env,
+                                   ithd->heap_util->GetTags(),
+                                   ithd->callbacks,
+                                   ithd->user_data);
+    ithd->stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0;
+  }
+
+  if (!ithd->stop_reports) {
+    jint array_ret = ReportPrimitiveArray(obj,
+                                          ithd->env,
+                                          ithd->heap_util->GetTags(),
+                                          ithd->callbacks,
+                                          ithd->user_data);
+    ithd->stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0;
+  }
+
+  if (!ithd->stop_reports) {
+    ithd->stop_reports = ReportPrimitiveField::Report(obj,
+                                                      ithd->heap_util->GetTags(),
+                                                      ithd->callbacks,
+                                                      ithd->user_data);
+  }
 }
 
-jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env ATTRIBUTE_UNUSED,
+jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env,
                                         jint heap_filter,
                                         jclass klass,
                                         const jvmtiHeapCallbacks* callbacks,
@@ -152,17 +760,13 @@
     return ERR(NULL_POINTER);
   }
 
-  if (callbacks->array_primitive_value_callback != nullptr) {
-    // TODO: Implement.
-    return ERR(NOT_IMPLEMENTED);
-  }
-
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
 
   IterateThroughHeapData ithd(this,
-                              heap_filter,
+                              env,
                               soa.Decode<art::mirror::Class>(klass),
+                              heap_filter,
                               callbacks,
                               user_data);
 
@@ -174,12 +778,18 @@
 class FollowReferencesHelper FINAL {
  public:
   FollowReferencesHelper(HeapUtil* h,
+                         jvmtiEnv* jvmti_env,
                          art::ObjPtr<art::mirror::Object> initial_object,
                          const jvmtiHeapCallbacks* callbacks,
+                         art::ObjPtr<art::mirror::Class> class_filter,
+                         jint heap_filter,
                          const void* user_data)
-      : tag_table_(h->GetTags()),
+      : env(jvmti_env),
+        tag_table_(h->GetTags()),
         initial_object_(initial_object),
         callbacks_(callbacks),
+        class_filter_(class_filter),
+        heap_filter_(heap_filter),
         user_data_(user_data),
         start_(0),
         stop_reports_(false) {
@@ -414,59 +1024,49 @@
       return;
     }
 
-    // TODO: We'll probably have to rewrite this completely with our own visiting logic, if we
-    //       want to have a chance of getting the field indices computed halfway efficiently. For
-    //       now, ignore them altogether.
-
-    struct InstanceReferenceVisitor {
-      explicit InstanceReferenceVisitor(FollowReferencesHelper* helper_)
-          : helper(helper_), stop_reports(false) {}
-
-      void operator()(art::mirror::Object* src,
-                      art::MemberOffset field_offset,
-                      bool is_static ATTRIBUTE_UNUSED) const
-          REQUIRES_SHARED(art::Locks::mutator_lock_)
-          REQUIRES(!*helper->tag_table_->GetAllowDisallowLock()) {
-        if (stop_reports) {
-          return;
-        }
-
-        art::mirror::Object* trg = src->GetFieldObjectReferenceAddr(field_offset)->AsMirrorPtr();
+    // All instance fields.
+    auto report_instance_field = [&](art::ObjPtr<art::mirror::Object> src,
+                                     art::ObjPtr<art::mirror::Class> obj_klass ATTRIBUTE_UNUSED,
+                                     art::ArtField& field,
+                                     size_t field_index,
+                                     void* user_data ATTRIBUTE_UNUSED)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      art::ObjPtr<art::mirror::Object> field_value = field.GetObject(src);
+      if (field_value != nullptr) {
         jvmtiHeapReferenceInfo reference_info;
         memset(&reference_info, 0, sizeof(reference_info));
 
-        // TODO: Implement spec-compliant numbering.
-        reference_info.field.index = field_offset.Int32Value();
+        reference_info.field.index = field_index;
 
         jvmtiHeapReferenceKind kind =
-            field_offset.Int32Value() == art::mirror::Object::ClassOffset().Int32Value()
+            field.GetOffset().Int32Value() == art::mirror::Object::ClassOffset().Int32Value()
                 ? JVMTI_HEAP_REFERENCE_CLASS
                 : JVMTI_HEAP_REFERENCE_FIELD;
         const jvmtiHeapReferenceInfo* reference_info_ptr =
             kind == JVMTI_HEAP_REFERENCE_CLASS ? nullptr : &reference_info;
 
-        stop_reports = !helper->ReportReferenceMaybeEnqueue(kind, reference_info_ptr, src, trg);
+        return !ReportReferenceMaybeEnqueue(kind, reference_info_ptr, src.Ptr(), field_value.Ptr());
       }
-
-      void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED)
-          const {
-        LOG(FATAL) << "Unreachable";
-      }
-      void VisitRootIfNonNull(
-          art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED) const {
-        LOG(FATAL) << "Unreachable";
-      }
-
-      // "mutable" required by the visitor API.
-      mutable FollowReferencesHelper* helper;
-      mutable bool stop_reports;
+      return false;
     };
+    stop_reports_ = FieldVisitor<void, true>::ReportFields(obj,
+                                                           nullptr,
+                                                           VisitorFalse<void>,
+                                                           VisitorFalse<void>,
+                                                           VisitorFalse<void>,
+                                                           report_instance_field);
+    if (stop_reports_) {
+      return;
+    }
 
-    InstanceReferenceVisitor visitor(this);
-    // Visit references, not native roots.
-    obj->VisitReferences<false>(visitor, art::VoidFunctor());
+    jint string_ret = ReportString(obj, env, tag_table_, callbacks_, user_data_);
+    stop_reports_ = (string_ret & JVMTI_VISIT_ABORT) != 0;
+    if (stop_reports_) {
+      return;
+    }
 
-    stop_reports_ = visitor.stop_reports;
+    stop_reports_ = ReportPrimitiveField::Report(obj, tag_table_, callbacks_, user_data_);
   }
 
   void VisitArray(art::mirror::Object* array)
@@ -498,6 +1098,11 @@
           }
         }
       }
+    } else {
+      if (!stop_reports_) {
+        jint array_ret = ReportPrimitiveArray(array, env, tag_table_, callbacks_, user_data_);
+        stop_reports_ = (array_ret & JVMTI_VISIT_ABORT) != 0;
+      }
     }
   }
 
@@ -555,26 +1160,38 @@
     DCHECK_EQ(h_klass.Get(), klass);
 
     // Declared static fields.
-    for (auto& field : klass->GetSFields()) {
-      if (!field.IsPrimitiveType()) {
-        art::ObjPtr<art::mirror::Object> field_value = field.GetObject(klass);
-        if (field_value != nullptr) {
-          jvmtiHeapReferenceInfo reference_info;
-          memset(&reference_info, 0, sizeof(reference_info));
+    auto report_static_field = [&](art::ObjPtr<art::mirror::Object> obj ATTRIBUTE_UNUSED,
+                                   art::ObjPtr<art::mirror::Class> obj_klass,
+                                   art::ArtField& field,
+                                   size_t field_index,
+                                   void* user_data ATTRIBUTE_UNUSED)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      art::ObjPtr<art::mirror::Object> field_value = field.GetObject(obj_klass);
+      if (field_value != nullptr) {
+        jvmtiHeapReferenceInfo reference_info;
+        memset(&reference_info, 0, sizeof(reference_info));
 
-          // TODO: Implement spec-compliant numbering.
-          reference_info.field.index = field.GetOffset().Int32Value();
+        reference_info.field.index = static_cast<jint>(field_index);
 
-          stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_STATIC_FIELD,
-                                                       &reference_info,
-                                                       klass,
-                                                       field_value.Ptr());
-          if (stop_reports_) {
-            return;
-          }
-        }
+        return !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_STATIC_FIELD,
+                                            &reference_info,
+                                            obj_klass.Ptr(),
+                                            field_value.Ptr());
       }
+      return false;
+    };
+    stop_reports_ = FieldVisitor<void, false>::ReportFields(klass,
+                                                            nullptr,
+                                                            VisitorFalse<void>,
+                                                            report_static_field,
+                                                            VisitorFalse<void>,
+                                                            VisitorFalse<void>);
+    if (stop_reports_) {
+      return;
     }
+
+    stop_reports_ = ReportPrimitiveField::Report(klass, tag_table_, callbacks_, user_data_);
   }
 
   void MaybeEnqueue(art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
@@ -611,11 +1228,20 @@
       return 0;
     }
 
+    if (UNLIKELY(class_filter_ != nullptr) && class_filter_ != referree->GetClass()) {
+      return JVMTI_VISIT_OBJECTS;
+    }
+
     const jlong class_tag = tag_table_->GetTagOrZero(referree->GetClass());
+    jlong tag = tag_table_->GetTagOrZero(referree);
+
+    if (!heap_filter_.ShouldReportByHeapFilter(tag, class_tag)) {
+      return JVMTI_VISIT_OBJECTS;
+    }
+
     const jlong referrer_class_tag =
         referrer == nullptr ? 0 : tag_table_->GetTagOrZero(referrer->GetClass());
     const jlong size = static_cast<jlong>(referree->SizeOf());
-    jlong tag = tag_table_->GetTagOrZero(referree);
     jlong saved_tag = tag;
     jlong referrer_tag = 0;
     jlong saved_referrer_tag = 0;
@@ -630,6 +1256,7 @@
         referrer_tag_ptr = &referrer_tag;
       }
     }
+
     jint length = -1;
     if (referree->IsArrayInstance()) {
       length = referree->AsArray()->GetLength();
@@ -655,9 +1282,12 @@
     return result;
   }
 
+  jvmtiEnv* env;
   ObjectTagTable* tag_table_;
   art::ObjPtr<art::mirror::Object> initial_object_;
   const jvmtiHeapCallbacks* callbacks_;
+  art::ObjPtr<art::mirror::Class> class_filter_;
+  const HeapFilter heap_filter_;
   const void* user_data_;
 
   std::vector<art::mirror::Object*> worklist_;
@@ -671,9 +1301,9 @@
   friend class CollectAndReportRootsVisitor;
 };
 
-jvmtiError HeapUtil::FollowReferences(jvmtiEnv* env ATTRIBUTE_UNUSED,
-                                      jint heap_filter ATTRIBUTE_UNUSED,
-                                      jclass klass ATTRIBUTE_UNUSED,
+jvmtiError HeapUtil::FollowReferences(jvmtiEnv* env,
+                                      jint heap_filter,
+                                      jclass klass,
                                       jobject initial_object,
                                       const jvmtiHeapCallbacks* callbacks,
                                       const void* user_data) {
@@ -681,11 +1311,6 @@
     return ERR(NULL_POINTER);
   }
 
-  if (callbacks->array_primitive_value_callback != nullptr) {
-    // TODO: Implement.
-    return ERR(NOT_IMPLEMENTED);
-  }
-
   art::Thread* self = art::Thread::Current();
 
   art::gc::Heap* heap = art::Runtime::Current()->GetHeap();
@@ -699,9 +1324,15 @@
     art::ScopedThreadSuspension sts(self, art::kWaitingForVisitObjects);
     art::ScopedSuspendAll ssa("FollowReferences");
 
+    art::ObjPtr<art::mirror::Class> class_filter = klass == nullptr
+        ? nullptr
+        : art::ObjPtr<art::mirror::Class>::DownCast(self->DecodeJObject(klass));
     FollowReferencesHelper frh(this,
+                               env,
                                self->DecodeJObject(initial_object),
                                callbacks,
+                               class_filter,
+                               heap_filter,
                                user_data);
     frh.Init();
     frh.Work();
diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h
index 72ee097..dccecb4 100644
--- a/runtime/openjdkjvmti/ti_heap.h
+++ b/runtime/openjdkjvmti/ti_heap.h
@@ -49,6 +49,9 @@
     return tags_;
   }
 
+  static void Register();
+  static void Unregister();
+
  private:
   ObjectTagTable* tags_;
 };
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
index a6cfcc1..bc73029 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -110,35 +110,32 @@
   art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   art_method = art_method->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
 
-  JvmtiUniquePtr name_copy;
+  JvmtiUniquePtr<char[]> name_copy;
   if (name_ptr != nullptr) {
     const char* method_name = art_method->GetName();
     if (method_name == nullptr) {
       method_name = "<error>";
     }
-    unsigned char* tmp;
-    jvmtiError ret = CopyString(env, method_name, &tmp);
-    if (ret != ERR(NONE)) {
+    jvmtiError ret;
+    name_copy = CopyString(env, method_name, &ret);
+    if (name_copy == nullptr) {
       return ret;
     }
-    name_copy = MakeJvmtiUniquePtr(env, tmp);
-    *name_ptr = reinterpret_cast<char*>(tmp);
+    *name_ptr = name_copy.get();
   }
 
-  JvmtiUniquePtr signature_copy;
+  JvmtiUniquePtr<char[]> signature_copy;
   if (signature_ptr != nullptr) {
     const art::Signature sig = art_method->GetSignature();
     std::string str = sig.ToString();
-    unsigned char* tmp;
-    jvmtiError ret = CopyString(env, str.c_str(), &tmp);
-    if (ret != ERR(NONE)) {
+    jvmtiError ret;
+    signature_copy = CopyString(env, str.c_str(), &ret);
+    if (signature_copy == nullptr) {
       return ret;
     }
-    signature_copy = MakeJvmtiUniquePtr(env, tmp);
-    *signature_ptr = reinterpret_cast<char*>(tmp);
+    *signature_ptr = signature_copy.get();
   }
 
-  // TODO: Support generic signature.
   if (generic_ptr != nullptr) {
     *generic_ptr = nullptr;
     if (!art_method->GetDeclaringClass()->IsProxyClass()) {
@@ -150,12 +147,12 @@
           oss << str_array->Get(i)->ToModifiedUtf8();
         }
         std::string output_string = oss.str();
-        unsigned char* tmp;
-        jvmtiError ret = CopyString(env, output_string.c_str(), &tmp);
-        if (ret != ERR(NONE)) {
+        jvmtiError ret;
+        JvmtiUniquePtr<char[]> generic_copy = CopyString(env, output_string.c_str(), &ret);
+        if (generic_copy == nullptr) {
           return ret;
         }
-        *generic_ptr = reinterpret_cast<char*>(tmp);
+        *generic_ptr = generic_copy.release();
       } else if (soa.Self()->IsExceptionPending()) {
         // TODO: Should we report an error here?
         soa.Self()->ClearException();
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index 60371cf..e494cb6 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -56,7 +56,6 @@
   }
 
   void NextRuntimePhase(RuntimePhase phase) REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
-    // TODO: Events.
     switch (phase) {
       case RuntimePhase::kInitialAgents:
         PhaseUtil::current_phase_ = JVMTI_PHASE_PRIMORDIAL;
diff --git a/runtime/openjdkjvmti/ti_properties.cc b/runtime/openjdkjvmti/ti_properties.cc
index 46b9e71..4f4f013 100644
--- a/runtime/openjdkjvmti/ti_properties.cc
+++ b/runtime/openjdkjvmti/ti_properties.cc
@@ -82,71 +82,69 @@
 static constexpr const char* kPropertyLibraryPath = "java.library.path";
 static constexpr const char* kPropertyClassPath = "java.class.path";
 
-static jvmtiError Copy(jvmtiEnv* env, const char* in, char** out) {
-  unsigned char* data = nullptr;
-  jvmtiError result = CopyString(env, in, &data);
-  *out = reinterpret_cast<char*>(data);
-  return result;
-}
-
 jvmtiError PropertiesUtil::GetSystemProperties(jvmtiEnv* env,
                                                jint* count_ptr,
                                                char*** property_ptr) {
   if (count_ptr == nullptr || property_ptr == nullptr) {
     return ERR(NULL_POINTER);
   }
-  unsigned char* array_data;
-  jvmtiError array_alloc_result = env->Allocate((kPropertiesSize + 2) * sizeof(char*), &array_data);
-  if (array_alloc_result != ERR(NONE)) {
+  jvmtiError array_alloc_result;
+  JvmtiUniquePtr<char*[]> array_data_ptr = AllocJvmtiUniquePtr<char*[]>(env,
+                                                                        kPropertiesSize + 2,
+                                                                        &array_alloc_result);
+  if (array_data_ptr == nullptr) {
     return array_alloc_result;
   }
-  JvmtiUniquePtr array_data_ptr = MakeJvmtiUniquePtr(env, array_data);
-  char** array = reinterpret_cast<char**>(array_data);
 
-  std::vector<JvmtiUniquePtr> property_copies;
+  std::vector<JvmtiUniquePtr<char[]>> property_copies;
 
   {
-    char* libpath_data;
-    jvmtiError libpath_result = Copy(env, kPropertyLibraryPath, &libpath_data);
-    if (libpath_result != ERR(NONE)) {
+    jvmtiError libpath_result;
+    JvmtiUniquePtr<char[]> libpath_data = CopyString(env, kPropertyLibraryPath, &libpath_result);
+    if (libpath_data == nullptr) {
       return libpath_result;
     }
-    array[0] = libpath_data;
-    property_copies.push_back(MakeJvmtiUniquePtr(env, libpath_data));
+    array_data_ptr.get()[0] = libpath_data.get();
+    property_copies.push_back(std::move(libpath_data));
   }
 
   {
-    char* classpath_data;
-    jvmtiError classpath_result = Copy(env, kPropertyClassPath, &classpath_data);
-    if (classpath_result != ERR(NONE)) {
+    jvmtiError classpath_result;
+    JvmtiUniquePtr<char[]> classpath_data = CopyString(env, kPropertyClassPath, &classpath_result);
+    if (classpath_data == nullptr) {
       return classpath_result;
     }
-    array[1] = classpath_data;
-    property_copies.push_back(MakeJvmtiUniquePtr(env, classpath_data));
+    array_data_ptr.get()[1] = classpath_data.get();
+    property_copies.push_back(std::move(classpath_data));
   }
 
   for (size_t i = 0; i != kPropertiesSize; ++i) {
-    char* data;
-    jvmtiError data_result = Copy(env, kProperties[i][0], &data);
-    if (data_result != ERR(NONE)) {
+    jvmtiError data_result;
+    JvmtiUniquePtr<char[]> data = CopyString(env, kProperties[i][0], &data_result);
+    if (data == nullptr) {
       return data_result;
     }
-    array[i + 2] = data;
-    property_copies.push_back(MakeJvmtiUniquePtr(env, data));
+    array_data_ptr.get()[i + 2] = data.get();
+    property_copies.push_back(std::move(data));
   }
 
   // Everything is OK, release the data.
-  array_data_ptr.release();
+  *count_ptr = kPropertiesSize + 2;
+  *property_ptr = array_data_ptr.release();
   for (auto& uptr : property_copies) {
     uptr.release();
   }
 
-  *count_ptr = kPropertiesSize + 2;
-  *property_ptr = array;
-
   return ERR(NONE);
 }
 
+static jvmtiError Copy(jvmtiEnv* env, const char* in, char** out) {
+  jvmtiError result;
+  JvmtiUniquePtr<char[]> data = CopyString(env, in, &result);
+  *out = data.release();
+  return result;
+}
+
 jvmtiError PropertiesUtil::GetSystemProperty(jvmtiEnv* env,
                                              const char* property,
                                              char** value_ptr) {
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index 8436045..9c1d6ef 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -56,6 +56,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_ext.h"
 #include "mirror/object.h"
+#include "non_debuggable_classes.h"
 #include "object_lock.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
@@ -170,10 +171,6 @@
       // We cannot ensure that the right dex file is used in inlined frames so we don't support
       // redefining them.
       DCHECK(!IsInInlinedFrame()) << "Inlined frames are not supported when using redefinition";
-      // TODO We should really support intrinsic obsolete methods.
-      // TODO We should really support redefining intrinsics.
-      // We don't support intrinsics so check for them here.
-      DCHECK(!old_method->IsIntrinsic());
       art::ArtMethod* new_obsolete_method = obsolete_maps_->FindObsoleteVersion(old_method);
       if (new_obsolete_method == nullptr) {
         // Create a new Obsolete Method and put it in the list.
@@ -181,7 +178,7 @@
         art::ClassLinker* cl = runtime->GetClassLinker();
         auto ptr_size = cl->GetImagePointerSize();
         const size_t method_size = art::ArtMethod::Size(ptr_size);
-        auto* method_storage = allocator_->Alloc(GetThread(), method_size);
+        auto* method_storage = allocator_->Alloc(art::Thread::Current(), method_size);
         CHECK(method_storage != nullptr) << "Unable to allocate storage for obsolete version of '"
                                          << old_method->PrettyMethod() << "'";
         new_obsolete_method = new (method_storage) art::ArtMethod();
@@ -241,6 +238,9 @@
   } else if (klass->IsInterface()) {
     *error_msg = "Modification of Interface classes is currently not supported";
     return ERR(UNMODIFIABLE_CLASS);
+  } else if (klass->IsStringClass()) {
+    *error_msg = "Modification of String class is not supported";
+    return ERR(UNMODIFIABLE_CLASS);
   } else if (klass->IsArrayClass()) {
     *error_msg = "Modification of Array classes is not supported";
     return ERR(UNMODIFIABLE_CLASS);
@@ -249,8 +249,13 @@
     return ERR(UNMODIFIABLE_CLASS);
   }
 
-  // TODO We should check if the class has non-obsoletable methods on the stack
-  LOG(WARNING) << "presence of non-obsoletable methods on stacks is not currently checked";
+  for (jclass c : art::NonDebuggableClasses::GetNonDebuggableClasses()) {
+    if (klass.Get() == art::Thread::Current()->DecodeJObject(c)->AsClass()) {
+      *error_msg = "Class might have stack frames that cannot be made obsolete";
+      return ERR(UNMODIFIABLE_CLASS);
+    }
+  }
+
   return OK;
 }
 
@@ -298,6 +303,7 @@
 }
 
 jvmtiError Redefiner::RedefineClasses(ArtJvmTiEnv* env,
+                                      EventHandler* event_handler,
                                       art::Runtime* runtime,
                                       art::Thread* self,
                                       jint class_count,
@@ -319,13 +325,19 @@
   std::vector<ArtClassDefinition> def_vector;
   def_vector.reserve(class_count);
   for (jint i = 0; i < class_count; i++) {
+    jboolean is_modifiable = JNI_FALSE;
+    jvmtiError res = env->IsModifiableClass(definitions[i].klass, &is_modifiable);
+    if (res != OK) {
+      return res;
+    } else if (!is_modifiable) {
+      return ERR(UNMODIFIABLE_CLASS);
+    }
     // We make a copy of the class_bytes to pass into the retransformation.
     // This makes cleanup easier (since we unambiguously own the bytes) and also is useful since we
     // will need to keep the original bytes around unaltered for subsequent RetransformClasses calls
     // to get the passed in bytes.
-    // TODO Implement saving the original bytes.
     unsigned char* class_bytes_copy = nullptr;
-    jvmtiError res = env->Allocate(definitions[i].class_byte_count, &class_bytes_copy);
+    res = env->Allocate(definitions[i].class_byte_count, &class_bytes_copy);
     if (res != OK) {
       return res;
     }
@@ -346,6 +358,7 @@
   }
   // Call all the transformation events.
   jvmtiError res = Transformer::RetransformClassesDirect(env,
+                                                         event_handler,
                                                          self,
                                                          &def_vector);
   if (res != OK) {
@@ -396,8 +409,8 @@
     *error_msg_ = "Unable to get class signature!";
     return ret;
   }
-  JvmtiUniquePtr generic_unique_ptr(MakeJvmtiUniquePtr(env, generic_ptr_unused));
-  JvmtiUniquePtr signature_unique_ptr(MakeJvmtiUniquePtr(env, signature_ptr));
+  JvmtiUniquePtr<char> generic_unique_ptr(MakeJvmtiUniquePtr(env, generic_ptr_unused));
+  JvmtiUniquePtr<char> signature_unique_ptr(MakeJvmtiUniquePtr(env, signature_ptr));
   std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_dex_location,
                                                     def.dex_len,
                                                     def.dex_data.get(),
@@ -518,6 +531,11 @@
   CallbackCtx ctx(&map, linker->GetAllocatorForClassLoader(art_klass->GetClassLoader()));
   // Add all the declared methods to the map
   for (auto& m : art_klass->GetDeclaredMethods(art::kRuntimePointerSize)) {
+    if (m.IsIntrinsic()) {
+      LOG(WARNING) << "Redefining intrinsic method " << m.PrettyMethod() << ". This may cause the "
+                   << "unexpected use of the original definition of " << m.PrettyMethod() << "in "
+                   << "methods that have already been compiled.";
+    }
     // It is possible to simply filter out some methods where they cannot really become obsolete,
     // such as native methods and keep their original (possibly optimized) implementations. We don't
     // do this, however, since we would need to mark these functions (still in the classes
@@ -526,8 +544,6 @@
     // error checking from the interpreter which ensure we don't try to start executing obsolete
     // methods.
     ctx.obsolete_methods.insert(&m);
-    // TODO Allow this or check in IsModifiableClass.
-    DCHECK(!m.IsIntrinsic());
   }
   {
     art::MutexLock mu(driver_->self_, *art::Locks::thread_list_lock_);
@@ -674,7 +690,6 @@
 }
 
 bool Redefiner::ClassRedefinition::CheckClass() {
-  // TODO Might just want to put it in a ObjPtr and NoSuspend assert.
   art::StackHandleScope<1> hs(driver_->self_);
   // Easy check that only 1 class def is present.
   if (dex_file_->NumClassDefs() != 1) {
@@ -750,7 +765,6 @@
   return true;
 }
 
-// TODO Move this to use IsRedefinable when that function is made.
 bool Redefiner::ClassRedefinition::CheckRedefinable() {
   std::string err;
   art::StackHandleScope<1> hs(driver_->self_);
@@ -772,6 +786,8 @@
       CheckSameMethods();
 }
 
+class RedefinitionDataIter;
+
 // A wrapper that lets us hold onto the arbitrary sized data needed for redefinitions in a
 // reasonably sane way. This adds no fields to the normal ObjectArray. By doing this we can avoid
 // having to deal with the fact that we need to hold an arbitrary number of references live.
@@ -795,13 +811,15 @@
   RedefinitionDataHolder(art::StackHandleScope<1>* hs,
                          art::Runtime* runtime,
                          art::Thread* self,
-                         int32_t num_redefinitions) REQUIRES_SHARED(art::Locks::mutator_lock_) :
+                         std::vector<Redefiner::ClassRedefinition>* redefinitions)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) :
     arr_(
       hs->NewHandle(
         art::mirror::ObjectArray<art::mirror::Object>::Alloc(
             self,
             runtime->GetClassLinker()->GetClassRoot(art::ClassLinker::kObjectArrayClass),
-            num_redefinitions * kNumSlots))) {}
+            redefinitions->size() * kNumSlots))),
+    redefinitions_(redefinitions) {}
 
   bool IsNull() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
     return arr_.IsNull();
@@ -863,8 +881,27 @@
     return arr_->GetLength() / kNumSlots;
   }
 
+  std::vector<Redefiner::ClassRedefinition>* GetRedefinitions()
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return redefinitions_;
+  }
+
+  bool operator==(const RedefinitionDataHolder& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return arr_.Get() == other.arr_.Get();
+  }
+
+  bool operator!=(const RedefinitionDataHolder& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return !(*this == other);
+  }
+
+  RedefinitionDataIter begin() REQUIRES_SHARED(art::Locks::mutator_lock_);
+  RedefinitionDataIter end() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
  private:
   mutable art::Handle<art::mirror::ObjectArray<art::mirror::Object>> arr_;
+  std::vector<Redefiner::ClassRedefinition>* redefinitions_;
 
   art::mirror::Object* GetSlot(jint klass_index,
                                DataSlot slot) const REQUIRES_SHARED(art::Locks::mutator_lock_) {
@@ -883,9 +920,115 @@
   DISALLOW_COPY_AND_ASSIGN(RedefinitionDataHolder);
 };
 
-// TODO Stash and update soft failure state
-bool Redefiner::ClassRedefinition::CheckVerification(int32_t klass_index,
-                                                     const RedefinitionDataHolder& holder) {
+class RedefinitionDataIter {
+ public:
+  RedefinitionDataIter(int32_t idx, RedefinitionDataHolder& holder) : idx_(idx), holder_(holder) {}
+
+  RedefinitionDataIter(const RedefinitionDataIter&) = default;
+  RedefinitionDataIter(RedefinitionDataIter&&) = default;
+  RedefinitionDataIter& operator=(const RedefinitionDataIter&) = default;
+  RedefinitionDataIter& operator=(RedefinitionDataIter&&) = default;
+
+  bool operator==(const RedefinitionDataIter& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return idx_ == other.idx_ && holder_ == other.holder_;
+  }
+
+  bool operator!=(const RedefinitionDataIter& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return !(*this == other);
+  }
+
+  RedefinitionDataIter operator++() {  // Value after modification.
+    idx_++;
+    return *this;
+  }
+
+  RedefinitionDataIter operator++(int) {
+    RedefinitionDataIter temp = *this;
+    idx_++;
+    return temp;
+  }
+
+  RedefinitionDataIter operator+(ssize_t delta) const {
+    RedefinitionDataIter temp = *this;
+    temp += delta;
+    return temp;
+  }
+
+  RedefinitionDataIter& operator+=(ssize_t delta) {
+    idx_ += delta;
+    return *this;
+  }
+
+  Redefiner::ClassRedefinition& GetRedefinition() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return (*holder_.GetRedefinitions())[idx_];
+  }
+
+  RedefinitionDataHolder& GetHolder() {
+    return holder_;
+  }
+
+  art::mirror::ClassLoader* GetSourceClassLoader() const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetSourceClassLoader(idx_);
+  }
+  art::mirror::Object* GetJavaDexFile() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetJavaDexFile(idx_);
+  }
+  art::mirror::LongArray* GetNewDexFileCookie() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetNewDexFileCookie(idx_);
+  }
+  art::mirror::DexCache* GetNewDexCache() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetNewDexCache(idx_);
+  }
+  art::mirror::Class* GetMirrorClass() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetMirrorClass(idx_);
+  }
+  art::mirror::ByteArray* GetOriginalDexFileBytes() const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetOriginalDexFileBytes(idx_);
+  }
+  int32_t GetIndex() const {
+    return idx_;
+  }
+
+  void SetSourceClassLoader(art::mirror::ClassLoader* loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetSourceClassLoader(idx_, loader);
+  }
+  void SetJavaDexFile(art::mirror::Object* dexfile) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetJavaDexFile(idx_, dexfile);
+  }
+  void SetNewDexFileCookie(art::mirror::LongArray* cookie)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetNewDexFileCookie(idx_, cookie);
+  }
+  void SetNewDexCache(art::mirror::DexCache* cache) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetNewDexCache(idx_, cache);
+  }
+  void SetMirrorClass(art::mirror::Class* klass) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetMirrorClass(idx_, klass);
+  }
+  void SetOriginalDexFileBytes(art::mirror::ByteArray* bytes)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetOriginalDexFileBytes(idx_, bytes);
+  }
+
+ private:
+  int32_t idx_;
+  RedefinitionDataHolder& holder_;
+};
+
+RedefinitionDataIter RedefinitionDataHolder::begin() {
+  return RedefinitionDataIter(0, *this);
+}
+
+RedefinitionDataIter RedefinitionDataHolder::end() {
+  return RedefinitionDataIter(Length(), *this);
+}
+
+bool Redefiner::ClassRedefinition::CheckVerification(const RedefinitionDataIter& iter) {
   DCHECK_EQ(dex_file_->NumClassDefs(), 1u);
   art::StackHandleScope<2> hs(driver_->self_);
   std::string error;
@@ -893,7 +1036,7 @@
   art::verifier::MethodVerifier::FailureKind failure =
       art::verifier::MethodVerifier::VerifyClass(driver_->self_,
                                                  dex_file_.get(),
-                                                 hs.NewHandle(holder.GetNewDexCache(klass_index)),
+                                                 hs.NewHandle(iter.GetNewDexCache()),
                                                  hs.NewHandle(GetClassLoader()),
                                                  dex_file_->GetClassDef(0), /*class_def*/
                                                  nullptr, /*compiler_callbacks*/
@@ -912,21 +1055,20 @@
 // dexfile. This is so that even if multiple classes with the same classloader are redefined at
 // once they are all added to the classloader.
 bool Redefiner::ClassRedefinition::AllocateAndRememberNewDexFileCookie(
-    int32_t klass_index,
     art::Handle<art::mirror::ClassLoader> source_class_loader,
     art::Handle<art::mirror::Object> dex_file_obj,
-    /*out*/RedefinitionDataHolder* holder) {
+    /*out*/RedefinitionDataIter* cur_data) {
   art::StackHandleScope<2> hs(driver_->self_);
   art::MutableHandle<art::mirror::LongArray> old_cookie(
       hs.NewHandle<art::mirror::LongArray>(nullptr));
   bool has_older_cookie = false;
   // See if we already have a cookie that a previous redefinition got from the same classloader.
-  for (int32_t i = 0; i < klass_index; i++) {
-    if (holder->GetSourceClassLoader(i) == source_class_loader.Get()) {
+  for (auto old_data = cur_data->GetHolder().begin(); old_data != *cur_data; ++old_data) {
+    if (old_data.GetSourceClassLoader() == source_class_loader.Get()) {
       // Since every instance of this classloader should have the same cookie associated with it we
       // can stop looking here.
       has_older_cookie = true;
-      old_cookie.Assign(holder->GetNewDexFileCookie(i));
+      old_cookie.Assign(old_data.GetNewDexFileCookie());
       break;
     }
   }
@@ -947,14 +1089,14 @@
   }
 
   // Save the cookie.
-  holder->SetNewDexFileCookie(klass_index, new_cookie.Get());
+  cur_data->SetNewDexFileCookie(new_cookie.Get());
   // If there are other copies of this same classloader we need to make sure that we all have the
   // same cookie.
   if (has_older_cookie) {
-    for (int32_t i = 0; i < klass_index; i++) {
+    for (auto old_data = cur_data->GetHolder().begin(); old_data != *cur_data; ++old_data) {
       // We will let the GC take care of the cookie we allocated for this one.
-      if (holder->GetSourceClassLoader(i) == source_class_loader.Get()) {
-        holder->SetNewDexFileCookie(i, new_cookie.Get());
+      if (old_data.GetSourceClassLoader() == source_class_loader.Get()) {
+        old_data.SetNewDexFileCookie(new_cookie.Get());
       }
     }
   }
@@ -963,33 +1105,32 @@
 }
 
 bool Redefiner::ClassRedefinition::FinishRemainingAllocations(
-    int32_t klass_index, /*out*/RedefinitionDataHolder* holder) {
+    /*out*/RedefinitionDataIter* cur_data) {
   art::ScopedObjectAccessUnchecked soa(driver_->self_);
   art::StackHandleScope<2> hs(driver_->self_);
-  holder->SetMirrorClass(klass_index, GetMirrorClass());
+  cur_data->SetMirrorClass(GetMirrorClass());
   // This shouldn't allocate
   art::Handle<art::mirror::ClassLoader> loader(hs.NewHandle(GetClassLoader()));
   // The bootclasspath is handled specially so it doesn't have a j.l.DexFile.
   if (!art::ClassLinker::IsBootClassLoader(soa, loader.Get())) {
-    holder->SetSourceClassLoader(klass_index, loader.Get());
+    cur_data->SetSourceClassLoader(loader.Get());
     art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(
         ClassLoaderHelper::FindSourceDexFileObject(driver_->self_, loader)));
-    holder->SetJavaDexFile(klass_index, dex_file_obj.Get());
+    cur_data->SetJavaDexFile(dex_file_obj.Get());
     if (dex_file_obj == nullptr) {
-      // TODO Better error msg.
       RecordFailure(ERR(INTERNAL), "Unable to find dex file!");
       return false;
     }
     // Allocate the new dex file cookie.
-    if (!AllocateAndRememberNewDexFileCookie(klass_index, loader, dex_file_obj, holder)) {
+    if (!AllocateAndRememberNewDexFileCookie(loader, dex_file_obj, cur_data)) {
       driver_->self_->AssertPendingOOMException();
       driver_->self_->ClearException();
       RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate dex file array for class loader");
       return false;
     }
   }
-  holder->SetNewDexCache(klass_index, CreateNewDexCache(loader));
-  if (holder->GetNewDexCache(klass_index) == nullptr) {
+  cur_data->SetNewDexCache(CreateNewDexCache(loader));
+  if (cur_data->GetNewDexCache() == nullptr) {
     driver_->self_->AssertPendingException();
     driver_->self_->ClearException();
     RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate DexCache");
@@ -997,8 +1138,8 @@
   }
 
   // We won't always need to set this field.
-  holder->SetOriginalDexFileBytes(klass_index, AllocateOrGetOriginalDexFileBytes());
-  if (holder->GetOriginalDexFileBytes(klass_index) == nullptr) {
+  cur_data->SetOriginalDexFileBytes(AllocateOrGetOriginalDexFileBytes());
+  if (cur_data->GetOriginalDexFileBytes() == nullptr) {
     driver_->self_->AssertPendingOOMException();
     driver_->self_->ClearException();
     RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate array for original dex file");
@@ -1043,13 +1184,11 @@
 }
 
 bool Redefiner::FinishAllRemainingAllocations(RedefinitionDataHolder& holder) {
-  int32_t cnt = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
     // Allocate the data this redefinition requires.
-    if (!redef.FinishRemainingAllocations(cnt, &holder)) {
+    if (!data.GetRedefinition().FinishRemainingAllocations(&data)) {
       return false;
     }
-    cnt++;
   }
   return true;
 }
@@ -1064,22 +1203,39 @@
   }
 }
 
-bool Redefiner::CheckAllClassesAreVerified(const RedefinitionDataHolder& holder) {
-  int32_t cnt = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
-    if (!redef.CheckVerification(cnt, holder)) {
+bool Redefiner::CheckAllClassesAreVerified(RedefinitionDataHolder& holder) {
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
+    if (!data.GetRedefinition().CheckVerification(data)) {
       return false;
     }
-    cnt++;
   }
   return true;
 }
 
+class ScopedDisableConcurrentAndMovingGc {
+ public:
+  ScopedDisableConcurrentAndMovingGc(art::gc::Heap* heap, art::Thread* self)
+      : heap_(heap), self_(self) {
+    if (heap_->IsGcConcurrentAndMoving()) {
+      heap_->IncrementDisableMovingGC(self_);
+    }
+  }
+
+  ~ScopedDisableConcurrentAndMovingGc() {
+    if (heap_->IsGcConcurrentAndMoving()) {
+      heap_->DecrementDisableMovingGC(self_);
+    }
+  }
+ private:
+  art::gc::Heap* heap_;
+  art::Thread* self_;
+};
+
 jvmtiError Redefiner::Run() {
   art::StackHandleScope<1> hs(self_);
   // Allocate an array to hold onto all java temporary objects associated with this redefinition.
   // We will let this be collected after the end of this function.
-  RedefinitionDataHolder holder(&hs, runtime_, self_, redefinitions_.size());
+  RedefinitionDataHolder holder(&hs, runtime_, self_, &redefinitions_);
   if (holder.IsNull()) {
     self_->AssertPendingOOMException();
     self_->ClearException();
@@ -1102,58 +1258,43 @@
     // cleaned up by the GC eventually.
     return result_;
   }
+
   // At this point we can no longer fail without corrupting the runtime state.
-  int32_t counter = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
-    if (holder.GetSourceClassLoader(counter) == nullptr) {
-      runtime_->GetClassLinker()->AppendToBootClassPath(self_, redef.GetDexFile());
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
+    if (data.GetSourceClassLoader() == nullptr) {
+      runtime_->GetClassLinker()->AppendToBootClassPath(self_, data.GetRedefinition().GetDexFile());
     }
-    counter++;
   }
   UnregisterAllBreakpoints();
+
   // Disable GC and wait for it to be done if we are a moving GC.  This is fine since we are done
   // allocating so no deadlocks.
-  art::gc::Heap* heap = runtime_->GetHeap();
-  if (heap->IsGcConcurrentAndMoving()) {
-    // GC moving objects can cause deadlocks as we are deoptimizing the stack.
-    heap->IncrementDisableMovingGC(self_);
-  }
+  ScopedDisableConcurrentAndMovingGc sdcamgc(runtime_->GetHeap(), self_);
+
   // Do transition to final suspension
   // TODO We might want to give this its own suspended state!
   // TODO This isn't right. We need to change state without any chance of suspend ideally!
-  self_->TransitionFromRunnableToSuspended(art::ThreadState::kNative);
-  runtime_->GetThreadList()->SuspendAll(
-      "Final installation of redefined Classes!", /*long_suspend*/true);
-  // TODO We need to invalidate all breakpoints in the redefined class with the debugger.
-  // TODO We need to deal with any instrumentation/debugger deoptimized_methods_.
-  // TODO We need to update all debugger MethodIDs so they note the method they point to is
-  // obsolete or implement some other well defined semantics.
-  // TODO We need to decide on & implement semantics for JNI jmethodids when we redefine methods.
-  counter = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
+  art::ScopedThreadSuspension sts(self_, art::ThreadState::kNative);
+  art::ScopedSuspendAll ssa("Final installation of redefined Classes!", /*long_suspend*/true);
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
     art::ScopedAssertNoThreadSuspension nts("Updating runtime objects for redefinition");
-    if (holder.GetSourceClassLoader(counter) != nullptr) {
-      ClassLoaderHelper::UpdateJavaDexFile(holder.GetJavaDexFile(counter),
-                                           holder.GetNewDexFileCookie(counter));
+    ClassRedefinition& redef = data.GetRedefinition();
+    if (data.GetSourceClassLoader() != nullptr) {
+      ClassLoaderHelper::UpdateJavaDexFile(data.GetJavaDexFile(), data.GetNewDexFileCookie());
     }
-    art::mirror::Class* klass = holder.GetMirrorClass(counter);
+    art::mirror::Class* klass = data.GetMirrorClass();
     // TODO Rewrite so we don't do a stack walk for each and every class.
     redef.FindAndAllocateObsoleteMethods(klass);
-    redef.UpdateClass(klass, holder.GetNewDexCache(counter),
-                      holder.GetOriginalDexFileBytes(counter));
-    counter++;
+    redef.UpdateClass(klass, data.GetNewDexCache(), data.GetOriginalDexFileBytes());
   }
+  // TODO We should check for if any of the redefined methods are intrinsic methods here and, if any
+  // are, force a full-world deoptimization before finishing redefinition. If we don't do this then
+  // methods that have been jitted prior to the current redefinition being applied might continue
+  // to use the old versions of the intrinsics!
   // TODO Shrink the obsolete method maps if possible?
-  // TODO Put this into a scoped thing.
-  runtime_->GetThreadList()->ResumeAll();
-  // Get back shared mutator lock as expected for return.
-  self_->TransitionFromSuspendedToRunnable();
   // TODO Do the dex_file release at a more reasonable place. This works but it muddles who really
   // owns the DexFile and when ownership is transferred.
   ReleaseAllDexFiles();
-  if (heap->IsGcConcurrentAndMoving()) {
-    heap->DecrementDisableMovingGC(self_);
-  }
   return OK;
 }
 
@@ -1181,18 +1322,18 @@
     }
     const art::DexFile::ProtoId* proto_id = dex_file_->FindProtoId(method_return_idx,
                                                                    new_type_list);
-    // TODO Return false, cleanup.
     CHECK(proto_id != nullptr || old_type_list == nullptr);
     const art::DexFile::MethodId* method_id = dex_file_->FindMethodId(declaring_class_id,
                                                                       *new_name_id,
                                                                       *proto_id);
-    // TODO Return false, cleanup.
     CHECK(method_id != nullptr);
     uint32_t dex_method_idx = dex_file_->GetIndexForMethodId(*method_id);
     method.SetDexMethodIndex(dex_method_idx);
     linker->SetEntryPointsToInterpreter(&method);
     method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(class_def, dex_method_idx));
     method.SetDexCacheResolvedMethods(new_dex_cache->GetResolvedMethods(), image_pointer_size);
+    // Clear all the intrinsics related flags.
+    method.ClearAccessFlags(art::kAccIntrinsic | (~art::kAccFlagsNotUsedByIntrinsic));
     // Notify the jit that this method is redefined.
     art::jit::Jit* jit = driver_->runtime_->GetJit();
     if (jit != nullptr) {
@@ -1210,7 +1351,6 @@
           dex_file_->FindTypeId(field.GetDeclaringClass()->GetDescriptor(&declaring_class_name));
       const art::DexFile::StringId* new_name_id = dex_file_->FindStringId(field.GetName());
       const art::DexFile::TypeId* new_type_id = dex_file_->FindTypeId(field.GetTypeDescriptor());
-      // TODO Handle error, cleanup.
       CHECK(new_name_id != nullptr && new_type_id != nullptr && new_declaring_id != nullptr);
       const art::DexFile::FieldId* new_field_id =
           dex_file_->FindFieldId(*new_declaring_id, *new_name_id, *new_type_id);
@@ -1256,8 +1396,6 @@
   art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->EnsureExtDataPresent(driver_->self_)));
   if (ext == nullptr) {
     // No memory. Clear exception (it's not useful) and return error.
-    // TODO This doesn't need to be fatal. We could just not support obsolete methods after hitting
-    // this case.
     driver_->self_->AssertPendingOOMException();
     driver_->self_->ClearException();
     RecordFailure(ERR(OUT_OF_MEMORY), "Could not allocate ClassExt");
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
index 65ee291..4313a94 100644
--- a/runtime/openjdkjvmti/ti_redefine.h
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -66,6 +66,7 @@
 namespace openjdkjvmti {
 
 class RedefinitionDataHolder;
+class RedefinitionDataIter;
 
 // Class that can redefine a single class's methods.
 // TODO We should really make this be driven by an outside class so we can do multiple classes at
@@ -88,6 +89,7 @@
   // The caller is responsible for freeing it. The runtime makes its own copy of the data.
   // TODO This function should call the transformation events.
   static jvmtiError RedefineClasses(ArtJvmTiEnv* env,
+                                    EventHandler* event_handler,
                                     art::Runtime* runtime,
                                     art::Thread* self,
                                     jint class_count,
@@ -142,14 +144,13 @@
       driver_->RecordFailure(e, class_sig_, err);
     }
 
-    bool FinishRemainingAllocations(int32_t klass_index, /*out*/RedefinitionDataHolder* holder)
+    bool FinishRemainingAllocations(/*out*/RedefinitionDataIter* cur_data)
         REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     bool AllocateAndRememberNewDexFileCookie(
-        int32_t klass_index,
         art::Handle<art::mirror::ClassLoader> source_class_loader,
         art::Handle<art::mirror::Object> dex_file_obj,
-        /*out*/RedefinitionDataHolder* holder)
+        /*out*/RedefinitionDataIter* cur_data)
           REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     void FindAndAllocateObsoleteMethods(art::mirror::Class* art_klass)
@@ -160,8 +161,7 @@
     bool CheckClass() REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     // Checks that the contained class can be successfully verified.
-    bool CheckVerification(int32_t klass_index,
-                           const RedefinitionDataHolder& holder)
+    bool CheckVerification(const RedefinitionDataIter& holder)
         REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     // Preallocates all needed allocations in klass so that we can pause execution safely.
@@ -240,7 +240,7 @@
   jvmtiError Run() REQUIRES_SHARED(art::Locks::mutator_lock_);
 
   bool CheckAllRedefinitionAreValid() REQUIRES_SHARED(art::Locks::mutator_lock_);
-  bool CheckAllClassesAreVerified(const RedefinitionDataHolder& holder)
+  bool CheckAllClassesAreVerified(RedefinitionDataHolder& holder)
       REQUIRES_SHARED(art::Locks::mutator_lock_);
   bool EnsureAllClassAllocationsFinished() REQUIRES_SHARED(art::Locks::mutator_lock_);
   bool FinishAllRemainingAllocations(RedefinitionDataHolder& holder)
@@ -254,6 +254,8 @@
   }
 
   friend struct CallbackCtx;
+  friend class RedefinitionDataHolder;
+  friend class RedefinitionDataIter;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_search.cc b/runtime/openjdkjvmti/ti_search.cc
index df80f85..f51a98f 100644
--- a/runtime/openjdkjvmti/ti_search.cc
+++ b/runtime/openjdkjvmti/ti_search.cc
@@ -212,7 +212,6 @@
     return ERR(WRONG_PHASE);
   }
   if (current->GetClassLinker() == nullptr) {
-    // TODO: Support boot classpath change in OnLoad.
     return ERR(WRONG_PHASE);
   }
   if (segment == nullptr) {
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index f8f8fa6..788ac30 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -186,17 +186,17 @@
     return ERR(INVALID_THREAD);
   }
 
-  JvmtiUniquePtr name_uptr;
+  JvmtiUniquePtr<char[]> name_uptr;
   if (self != nullptr) {
     // Have a native thread object, this thread is alive.
     std::string name;
     self->GetThreadName(name);
-    jvmtiError name_result = CopyString(
-        env, name.c_str(), reinterpret_cast<unsigned char**>(&info_ptr->name));
-    if (name_result != ERR(NONE)) {
+    jvmtiError name_result;
+    name_uptr = CopyString(env, name.c_str(), &name_result);
+    if (name_uptr == nullptr) {
       return name_result;
     }
-    name_uptr = MakeJvmtiUniquePtr(env, info_ptr->name);
+    info_ptr->name = name_uptr.get();
 
     info_ptr->priority = self->GetNativePriority();
 
@@ -239,12 +239,12 @@
       } else {
         name_cstr = "";
       }
-      jvmtiError name_result = CopyString(
-          env, name_cstr, reinterpret_cast<unsigned char**>(&info_ptr->name));
-      if (name_result != ERR(NONE)) {
+      jvmtiError name_result;
+      name_uptr = CopyString(env, name_cstr, &name_result);
+      if (name_uptr == nullptr) {
         return name_result;
       }
-      name_uptr = MakeJvmtiUniquePtr(env, info_ptr->name);
+      info_ptr->name = name_uptr.get();
     }
 
     // Priority.
diff --git a/runtime/openjdkjvmti/ti_threadgroup.cc b/runtime/openjdkjvmti/ti_threadgroup.cc
index 1423874..df14333 100644
--- a/runtime/openjdkjvmti/ti_threadgroup.cc
+++ b/runtime/openjdkjvmti/ti_threadgroup.cc
@@ -116,11 +116,12 @@
       tmp_str = name_obj->ToModifiedUtf8();
       tmp_cstr = tmp_str.c_str();
     }
-    jvmtiError result =
-        CopyString(env, tmp_cstr, reinterpret_cast<unsigned char**>(&info_ptr->name));
-    if (result != ERR(NONE)) {
+    jvmtiError result;
+    JvmtiUniquePtr<char[]> copy = CopyString(env, tmp_cstr, &result);
+    if (copy == nullptr) {
       return result;
     }
+    info_ptr->name = copy.release();
   }
 
   // Parent.
@@ -239,45 +240,38 @@
   std::vector<art::ObjPtr<art::mirror::Object>> thread_groups;
   GetChildThreadGroups(thread_group, &thread_groups);
 
-  jthread* thread_data = nullptr;
-  JvmtiUniquePtr peers_uptr;
+  JvmtiUniquePtr<jthread[]> peers_uptr;
   if (!thread_peers.empty()) {
-    unsigned char* data;
-    jvmtiError res = env->Allocate(sizeof(jthread) * thread_peers.size(), &data);
-    if (res != ERR(NONE)) {
+    jvmtiError res;
+    peers_uptr = AllocJvmtiUniquePtr<jthread[]>(env, thread_peers.size(), &res);
+    if (peers_uptr == nullptr) {
       return res;
     }
-    thread_data = reinterpret_cast<jthread*>(data);
-    peers_uptr = MakeJvmtiUniquePtr(env, data);
   }
 
-  jthreadGroup* group_data = nullptr;
+  JvmtiUniquePtr<jthreadGroup[]> group_uptr;
   if (!thread_groups.empty()) {
-    unsigned char* data;
-    jvmtiError res = env->Allocate(sizeof(jthreadGroup) * thread_groups.size(), &data);
-    if (res != ERR(NONE)) {
+    jvmtiError res;
+    group_uptr = AllocJvmtiUniquePtr<jthreadGroup[]>(env, thread_groups.size(), &res);
+    if (group_uptr == nullptr) {
       return res;
     }
-    group_data = reinterpret_cast<jthreadGroup*>(data);
   }
 
   // Can't fail anymore from here on.
 
   // Copy data into out buffers.
   for (size_t i = 0; i != thread_peers.size(); ++i) {
-    thread_data[i] = soa.AddLocalReference<jthread>(thread_peers[i]);
+    peers_uptr[i] = soa.AddLocalReference<jthread>(thread_peers[i]);
   }
   for (size_t i = 0; i != thread_groups.size(); ++i) {
-    group_data[i] = soa.AddLocalReference<jthreadGroup>(thread_groups[i]);
+    group_uptr[i] = soa.AddLocalReference<jthreadGroup>(thread_groups[i]);
   }
 
   *thread_count_ptr = static_cast<jint>(thread_peers.size());
-  *threads_ptr = thread_data;
+  *threads_ptr = peers_uptr.release();
   *group_count_ptr = static_cast<jint>(thread_groups.size());
-  *groups_ptr = group_data;
-
-  // Everything's fine.
-  peers_uptr.release();
+  *groups_ptr = group_uptr.release();
 
   return ERR(NONE);
 }
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 2fec631..bd52cbb 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -63,12 +63,13 @@
 
 jvmtiError Transformer::RetransformClassesDirect(
       ArtJvmTiEnv* env,
+      EventHandler* event_handler,
       art::Thread* self,
       /*in-out*/std::vector<ArtClassDefinition>* definitions) {
   for (ArtClassDefinition& def : *definitions) {
     jint new_len = -1;
     unsigned char* new_data = nullptr;
-    gEventHandler.DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
+    event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
         self,
         GetJniEnv(env),
         def.klass,
@@ -85,6 +86,7 @@
 }
 
 jvmtiError Transformer::RetransformClasses(ArtJvmTiEnv* env,
+                                           EventHandler* event_handler,
                                            art::Runtime* runtime,
                                            art::Thread* self,
                                            jint class_count,
@@ -107,6 +109,13 @@
   std::vector<ArtClassDefinition> definitions;
   jvmtiError res = OK;
   for (jint i = 0; i < class_count; i++) {
+    jboolean is_modifiable = JNI_FALSE;
+    res = env->IsModifiableClass(classes[i], &is_modifiable);
+    if (res != OK) {
+      return res;
+    } else if (!is_modifiable) {
+      return ERR(UNMODIFIABLE_CLASS);
+    }
     ArtClassDefinition def;
     res = FillInTransformationData(env, classes[i], &def);
     if (res != OK) {
@@ -114,7 +123,7 @@
     }
     definitions.push_back(std::move(def));
   }
-  res = RetransformClassesDirect(env, self, &definitions);
+  res = RetransformClassesDirect(env, event_handler, self, &definitions);
   if (res != OK) {
     return res;
   }
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index 65f2ae1..c6a36e8 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -42,14 +42,20 @@
 
 namespace openjdkjvmti {
 
+class EventHandler;
+
 jvmtiError GetClassLocation(ArtJvmTiEnv* env, jclass klass, /*out*/std::string* location);
 
 class Transformer {
  public:
   static jvmtiError RetransformClassesDirect(
-      ArtJvmTiEnv* env, art::Thread* self, /*in-out*/std::vector<ArtClassDefinition>* definitions);
+      ArtJvmTiEnv* env,
+      EventHandler* event_handler,
+      art::Thread* self,
+      /*in-out*/std::vector<ArtClassDefinition>* definitions);
 
   static jvmtiError RetransformClasses(ArtJvmTiEnv* env,
+                                       EventHandler* event_handler,
                                        art::Runtime* runtime,
                                        art::Thread* self,
                                        jint class_count,
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index b009b47..3347070 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -215,9 +215,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(IsInstructionIPut(new_iput->Opcode()));
   uint32_t field_index = new_iput->VRegC_22c();
-  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  mirror::DexCache* dex_cache = method->GetDexCache();
-  ArtField* field = dex_cache->GetResolvedField(field_index, pointer_size);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* field = class_linker->LookupResolvedField(field_index, method, /* is_static */ false);
   if (UNLIKELY(field == nullptr)) {
     return false;
   }
@@ -227,7 +226,9 @@
     if (iputs[old_pos].field_index == DexFile::kDexNoIndex16) {
       break;
     }
-    ArtField* f = dex_cache->GetResolvedField(iputs[old_pos].field_index, pointer_size);
+    ArtField* f = class_linker->LookupResolvedField(iputs[old_pos].field_index,
+                                                    method,
+                                                    /* is_static */ false);
     DCHECK(f != nullptr);
     if (f == field) {
       auto back_it = std::copy(iputs + old_pos + 1, iputs + arraysize(iputs), iputs + old_pos);
@@ -732,9 +733,9 @@
   if (method == nullptr) {
     return false;
   }
-  mirror::DexCache* dex_cache = method->GetDexCache();
-  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  ArtField* field = dex_cache->GetResolvedField(field_idx, pointer_size);
+  ObjPtr<mirror::DexCache> dex_cache = method->GetDexCache();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* field = class_linker->LookupResolvedField(field_idx, method, /* is_static */ false);
   if (field == nullptr || field->IsStatic()) {
     return false;
   }
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 37cf257..2b38b2e 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -198,7 +198,7 @@
 
 inline void ReadBarrier::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
                                                 mirror::Object* ref) {
-  if (kEnableToSpaceInvariantChecks || kIsDebugBuild) {
+  if (kEnableToSpaceInvariantChecks) {
     if (ref == nullptr || IsDuringStartup()) {
       return;
     }
@@ -209,7 +209,7 @@
 
 inline void ReadBarrier::AssertToSpaceInvariant(GcRootSource* gc_root_source,
                                                 mirror::Object* ref) {
-  if (kEnableToSpaceInvariantChecks || kIsDebugBuild) {
+  if (kEnableToSpaceInvariantChecks) {
     if (ref == nullptr || IsDuringStartup()) {
       return;
     }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 69dcfeb..9fd2c88 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -114,6 +114,7 @@
 #include "native/java_lang_Thread.h"
 #include "native/java_lang_Throwable.h"
 #include "native/java_lang_VMClassLoader.h"
+#include "native/java_lang_Void.h"
 #include "native/java_lang_invoke_MethodHandleImpl.h"
 #include "native/java_lang_ref_FinalizerReference.h"
 #include "native/java_lang_ref_Reference.h"
@@ -1556,6 +1557,7 @@
   register_java_lang_Thread(env);
   register_java_lang_Throwable(env);
   register_java_lang_VMClassLoader(env);
+  register_java_lang_Void(env);
   register_java_util_concurrent_atomic_AtomicLong(env);
   register_libcore_util_CharsetUtils(env);
   register_org_apache_harmony_dalvik_ddmc_DdmServer(env);
@@ -1961,9 +1963,7 @@
 }
 
 void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
-                              const std::string& profile_output_filename,
-                              const std::string& foreign_dex_profile_path,
-                              const std::string& app_dir) {
+                              const std::string& profile_output_filename) {
   if (jit_.get() == nullptr) {
     // We are not JITing. Nothing to do.
     return;
@@ -1985,18 +1985,7 @@
     return;
   }
 
-  jit_->StartProfileSaver(profile_output_filename,
-                          code_paths,
-                          foreign_dex_profile_path,
-                          app_dir);
-}
-
-void Runtime::NotifyDexLoaded(const std::string& dex_location) {
-  VLOG(profiler) << "Notify dex loaded: " << dex_location;
-  // We know that if the ProfileSaver is started then we can record profile information.
-  if (ProfileSaver::IsStarted()) {
-    ProfileSaver::NotifyDexUse(dex_location);
-  }
+  jit_->StartProfileSaver(profile_output_filename, code_paths);
 }
 
 // Transaction support.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 4a0169d..d244a9b 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -454,10 +454,7 @@
   }
 
   void RegisterAppInfo(const std::vector<std::string>& code_paths,
-                       const std::string& profile_output_filename,
-                       const std::string& foreign_dex_profile_path,
-                       const std::string& app_dir);
-  void NotifyDexLoaded(const std::string& dex_location);
+                       const std::string& profile_output_filename);
 
   // Transaction support.
   bool IsActiveTransaction() const {
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index 000da59..c817a9e 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -79,11 +79,11 @@
   return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
 }
 
-template<typename T, bool kPoison>
-inline ObjPtr<T, kPoison> ScopedObjectAccessAlreadyRunnable::Decode(jobject obj) const {
+template<typename T>
+inline ObjPtr<T> ScopedObjectAccessAlreadyRunnable::Decode(jobject obj) const {
   Locks::mutator_lock_->AssertSharedHeld(Self());
   DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return ObjPtr<T, kPoison>::DownCast(Self()->DecodeJObject(obj));
+  return ObjPtr<T>::DownCast(Self()->DecodeJObject(obj));
 }
 
 inline bool ScopedObjectAccessAlreadyRunnable::IsRunnable() const {
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 24199f7..5f03741 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -27,7 +27,7 @@
 namespace art {
 
 struct JNIEnvExt;
-template<class MirrorType, bool kPoison> class ObjPtr;
+template<class MirrorType> class ObjPtr;
 
 // Scoped change into and out of a particular state. Handles Runnable transitions that require
 // more complicated suspension checking. The subclasses ScopedObjectAccessUnchecked and
@@ -91,8 +91,8 @@
   T AddLocalReference(ObjPtr<mirror::Object> obj) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<typename T, bool kPoison = kIsDebugBuild>
-  ObjPtr<T, kPoison> Decode(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
+  template<typename T>
+  ObjPtr<T> Decode(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE bool IsRunnable() const;
 
@@ -141,6 +141,8 @@
   ALWAYS_INLINE explicit ScopedObjectAccessUnchecked(Thread* self)
       REQUIRES(!Locks::thread_suspend_count_lock_);
 
+  ALWAYS_INLINE ~ScopedObjectAccessUnchecked() REQUIRES(!Locks::thread_suspend_count_lock_) {}
+
   // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
   // change into Runnable or acquire a share on the mutator_lock_.
   explicit ScopedObjectAccessUnchecked(JavaVM* vm) ALWAYS_INLINE
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 67f0b57..d936ce9 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -571,7 +571,7 @@
     }
   }
 
-  bool IsDexRegisterLive(uint16_t dex_register_number) const {
+  ALWAYS_INLINE bool IsDexRegisterLive(uint16_t dex_register_number) const {
     size_t live_bit_mask_offset_in_bits = GetLiveBitMaskOffset() * kBitsPerByte;
     return region_.LoadBit(live_bit_mask_offset_in_bits + dex_register_number);
   }
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 8d94626..02a1e4d 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -29,6 +29,7 @@
 #include "base/mutex-inl.h"
 #include "gc/heap.h"
 #include "jni_env_ext.h"
+#include "obj_ptr.h"
 #include "runtime.h"
 #include "thread_pool.h"
 
@@ -93,9 +94,7 @@
           if (held_mutex != nullptr &&
               held_mutex != Locks::mutator_lock_ &&
               held_mutex != cond_var_mutex) {
-            std::vector<BaseMutex*>& expected_mutexes = Locks::expected_mutexes_on_weak_ref_access_;
-            CHECK(std::find(expected_mutexes.begin(), expected_mutexes.end(), held_mutex) !=
-                  expected_mutexes.end())
+            CHECK(Locks::IsExpectedOnWeakRefAccess(held_mutex))
                 << "Holding unexpected mutex " << held_mutex->GetName()
                 << " when accessing weak ref";
           }
@@ -355,7 +354,7 @@
 }
 
 inline void Thread::PoisonObjectPointersIfDebug() {
-  if (kIsDebugBuild) {
+  if (kObjPtrPoisoning) {
     Thread::Current()->PoisonObjectPointers();
   }
 }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ff66cc1..30a4046 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1934,7 +1934,6 @@
   wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
   tlsPtr_.instrumentation_stack = new std::deque<instrumentation::InstrumentationStackFrame>;
   tlsPtr_.name = new std::string(kThreadNameDuringStartup);
-  tlsPtr_.nested_signal_state = static_cast<jmp_buf*>(malloc(sizeof(jmp_buf)));
 
   static_assert((sizeof(Thread) % 4) == 0U,
                 "art::Thread has a size which is not a multiple of 4.");
@@ -2118,7 +2117,6 @@
   delete tlsPtr_.instrumentation_stack;
   delete tlsPtr_.name;
   delete tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample;
-  free(tlsPtr_.nested_signal_state);
 
   Runtime::Current()->GetHeap()->AssertThreadLocalBuffersAreRevoked(this);
 
diff --git a/runtime/thread.h b/runtime/thread.h
index d5fd9e9..de0b892 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1115,21 +1115,12 @@
     return tlsPtr_.mterp_alt_ibase;
   }
 
-  // Notify that a signal is being handled. This is to protect us from doing recursive
-  // NPE handling after a SIGSEGV.
-  void NoteSignalBeingHandled() {
-    if (tls32_.handling_signal_) {
-      LOG(FATAL) << "Detected signal while processing a signal";
-    }
-    tls32_.handling_signal_ = true;
+  bool HandlingSignal() const {
+    return tls32_.handling_signal_;
   }
 
-  void NoteSignalHandlerDone() {
-    tls32_.handling_signal_ = false;
-  }
-
-  jmp_buf* GetNestedSignalState() {
-    return tlsPtr_.nested_signal_state;
+  void SetHandlingSignal(bool handling_signal) {
+    tls32_.handling_signal_ = handling_signal;
   }
 
   bool IsTransitioningToRunnable() const {
@@ -1460,7 +1451,7 @@
       thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
-      thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
+      thread_local_alloc_stack_end(nullptr),
       flip_function(nullptr), method_verifier(nullptr), thread_local_mark_stack(nullptr) {
       std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
     }
@@ -1606,9 +1597,6 @@
     // Support for Mutex lock hierarchy bug detection.
     BaseMutex* held_mutexes[kLockLevelCount];
 
-    // Recorded thread state for nested signals.
-    jmp_buf* nested_signal_state;
-
     // The function used for thread flip.
     Closure* flip_function;
 
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 3f6f76f..fd68deb 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -148,7 +148,7 @@
     return mask_;
   }
 
-  // Attempt to set an entry on it's hash' slot. If there is alrady something there, return false.
+  // Attempt to set an entry on its hash's slot. If there is already something there, return false.
   // Otherwise return true.
   bool SetOnInitialPos(const Entry& entry, uint32_t hash);
 
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index f9a1405..95904af 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -51,7 +51,11 @@
     : DexCacheArraysLayout(pointer_size, dex_file->GetHeader(), dex_file->NumCallSiteIds()) {
 }
 
-constexpr size_t DexCacheArraysLayout::Alignment() {
+inline size_t DexCacheArraysLayout::Alignment() const {
+  return Alignment(pointer_size_);
+}
+
+inline constexpr size_t DexCacheArraysLayout::Alignment(PointerSize pointer_size) {
   // mirror::Type/String/MethodTypeDexCacheType alignment is 8,
   // i.e. higher than or equal to the pointer alignment.
   static_assert(alignof(mirror::TypeDexCacheType) == 8,
@@ -60,8 +64,8 @@
                 "Expecting alignof(StringDexCacheType) == 8");
   static_assert(alignof(mirror::MethodTypeDexCacheType) == 8,
                 "Expecting alignof(MethodTypeDexCacheType) == 8");
-  // This is the same as alignof(MethodTypeDexCacheType).
-  return alignof(mirror::StringDexCacheType);
+  // This is the same as alignof(FieldDexCacheType) for the given pointer size.
+  return 2u * static_cast<size_t>(pointer_size);
 }
 
 template <typename T>
@@ -100,8 +104,8 @@
 }
 
 inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
-  return strings_offset_ + ElementOffset(PointerSize::k64,
-                                         string_idx % mirror::DexCache::kDexCacheStringCacheSize);
+  uint32_t string_hash = string_idx % mirror::DexCache::kDexCacheStringCacheSize;
+  return strings_offset_ + ElementOffset(PointerSize::k64, string_hash);
 }
 
 inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const {
@@ -119,15 +123,20 @@
 }
 
 inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
-  return fields_offset_ + ElementOffset(pointer_size_, field_idx);
+  uint32_t field_hash = field_idx % mirror::DexCache::kDexCacheFieldCacheSize;
+  return fields_offset_ + 2u * static_cast<size_t>(pointer_size_) * field_hash;
 }
 
 inline size_t DexCacheArraysLayout::FieldsSize(size_t num_elements) const {
-  return ArraySize(pointer_size_, num_elements);
+  size_t cache_size = mirror::DexCache::kDexCacheFieldCacheSize;
+  if (num_elements < cache_size) {
+    cache_size = num_elements;
+  }
+  return 2u * static_cast<size_t>(pointer_size_) * num_elements;
 }
 
 inline size_t DexCacheArraysLayout::FieldsAlignment() const {
-  return static_cast<size_t>(pointer_size_);
+  return 2u * static_cast<size_t>(pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::MethodTypesSize(size_t num_elements) const {
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index ed677ed..377a374 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -57,7 +57,9 @@
     return size_;
   }
 
-  static constexpr size_t Alignment();
+  size_t Alignment() const;
+
+  static constexpr size_t Alignment(PointerSize pointer_size);
 
   size_t TypesOffset() const {
     return types_offset_;
@@ -125,8 +127,6 @@
   const size_t call_sites_offset_;
   const size_t size_;
 
-  static size_t Alignment(PointerSize pointer_size);
-
   static size_t ElementOffset(PointerSize element_size, uint32_t idx);
 
   static size_t ArraySize(PointerSize element_size, uint32_t num_elements);
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 7daf2f8..898d07d 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -61,7 +61,7 @@
 
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
-    static constexpr uint8_t kVdexVersion[] = { '0', '0', '3', '\0' };  // Remove verify-profile
+    static constexpr uint8_t kVdexVersion[] = { '0', '0', '5', '\0' };  // access flags
 
     uint8_t magic_[4];
     uint8_t version_[4];
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 52f7e34..740b7dd 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -309,6 +309,7 @@
   // Note: no check for IsInstantiable() here. We may produce this in case an InstantiationError
   //       would be thrown at runtime, but we need to continue verification and *not* create a
   //       hard failure or abort.
+  CheckConstructorInvariants(this);
 }
 
 std::string UnresolvedMergedType::Dump() const {
@@ -789,7 +790,7 @@
   if (!klass_.IsNull()) {
     CHECK(!descriptor_.empty()) << *this;
     std::string temp;
-    CHECK_EQ(descriptor_.ToString(), klass_.Read()->GetDescriptor(&temp)) << *this;
+    CHECK_EQ(descriptor_, klass_.Read()->GetDescriptor(&temp)) << *this;
   }
 }
 
@@ -820,9 +821,7 @@
       reg_type_cache_(reg_type_cache),
       resolved_part_(resolved),
       unresolved_types_(unresolved, false, unresolved.GetAllocator()) {
-  if (kIsDebugBuild) {
-    CheckInvariants();
-  }
+  CheckConstructorInvariants(this);
 }
 void UnresolvedMergedType::CheckInvariants() const {
   CHECK(reg_type_cache_ != nullptr);
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 472381d..dedf77f 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -274,14 +274,17 @@
           uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
       : descriptor_(descriptor),
         klass_(klass),
-        cache_id_(cache_id) {
+        cache_id_(cache_id) {}
+
+  template <typename Class>
+  void CheckConstructorInvariants(Class* this_ ATTRIBUTE_UNUSED) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    static_assert(std::is_final<Class>::value, "Class must be final.");
     if (kIsDebugBuild) {
       CheckInvariants();
     }
   }
 
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
-
   const StringPiece descriptor_;
   mutable GcRoot<mirror::Class> klass_;  // Non-const only due to moving classes.
   const uint16_t cache_id_;
@@ -289,6 +292,8 @@
   friend class RegTypeCache;
 
  private:
+  virtual void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+
   /*
    * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
    * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
@@ -339,7 +344,9 @@
  private:
   ConflictType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : RegType(klass, descriptor, cache_id) {}
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   static const ConflictType* instance_;
 };
@@ -368,7 +375,9 @@
  private:
   UndefinedType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : RegType(klass, descriptor, cache_id) {}
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   static const UndefinedType* instance_;
 };
@@ -387,7 +396,7 @@
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_);
 };
 
-class IntegerType : public Cat1Type {
+class IntegerType FINAL : public Cat1Type {
  public:
   bool IsInteger() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
@@ -401,7 +410,9 @@
  private:
   IntegerType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const IntegerType* instance_;
 };
 
@@ -419,7 +430,9 @@
  private:
   BooleanType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   static const BooleanType* instance_;
 };
@@ -438,7 +451,9 @@
  private:
   ByteType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const ByteType* instance_;
 };
 
@@ -456,7 +471,9 @@
  private:
   ShortType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const ShortType* instance_;
 };
 
@@ -474,7 +491,9 @@
  private:
   CharType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const CharType* instance_;
 };
 
@@ -492,7 +511,9 @@
  private:
   FloatType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const FloatType* instance_;
 };
 
@@ -517,7 +538,9 @@
  private:
   LongLoType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const LongLoType* instance_;
 };
 
@@ -535,7 +558,9 @@
  private:
   LongHiType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const LongHiType* instance_;
 };
 
@@ -554,7 +579,9 @@
  private:
   DoubleLoType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const DoubleLoType* instance_;
 };
 
@@ -572,7 +599,9 @@
  private:
   DoubleHiType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const DoubleHiType* instance_;
 };
 
@@ -637,7 +666,9 @@
  public:
   PreciseConstType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   bool IsPreciseConstant() const OVERRIDE { return true; }
 
@@ -648,7 +679,9 @@
  public:
   PreciseConstLoType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsPreciseConstantLo() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -657,7 +690,9 @@
  public:
   PreciseConstHiType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsPreciseConstantHi() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -667,6 +702,7 @@
   ImpreciseConstType(uint32_t constat, uint16_t cache_id)
        REQUIRES_SHARED(Locks::mutator_lock_)
        : ConstantType(constat, cache_id) {
+    CheckConstructorInvariants(this);
   }
   bool IsImpreciseConstant() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
@@ -676,7 +712,9 @@
  public:
   ImpreciseConstLoType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsImpreciseConstantLo() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -685,7 +723,9 @@
  public:
   ImpreciseConstHiType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsImpreciseConstantHi() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -718,7 +758,9 @@
                              const StringPiece& descriptor,
                              uint32_t allocation_pc, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : UninitializedType(klass, descriptor, allocation_pc, cache_id) {}
+      : UninitializedType(klass, descriptor, allocation_pc, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   bool IsUninitializedReference() const OVERRIDE { return true; }
 
@@ -735,9 +777,7 @@
                                  uint32_t allocation_pc, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UninitializedType(nullptr, descriptor, allocation_pc, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedAndUninitializedReference() const OVERRIDE { return true; }
@@ -747,7 +787,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 // Similar to UninitializedReferenceType but special case for the this argument
@@ -759,9 +799,7 @@
                                  uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UninitializedType(klass, descriptor, 0, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   virtual bool IsUninitializedThisReference() const OVERRIDE { return true; }
@@ -771,7 +809,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 class UnresolvedUninitializedThisRefType FINAL : public UninitializedType {
@@ -780,9 +818,7 @@
                                      uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UninitializedType(nullptr, descriptor, 0, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedAndUninitializedThisReference() const OVERRIDE { return true; }
@@ -792,7 +828,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 // A type of register holding a reference to an Object of type GetClass or a
@@ -801,7 +837,9 @@
  public:
   ReferenceType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : RegType(klass, descriptor, cache_id) {}
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   bool IsReference() const OVERRIDE { return true; }
 
@@ -848,9 +886,7 @@
   UnresolvedReferenceType(const StringPiece& descriptor, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UnresolvedType(descriptor, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedReference() const OVERRIDE { return true; }
@@ -860,7 +896,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 // Type representing the super-class of an unresolved type.
@@ -872,9 +908,7 @@
       : UnresolvedType("", cache_id),
         unresolved_child_id_(child_id),
         reg_type_cache_(reg_type_cache) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedSuperClass() const OVERRIDE { return true; }
@@ -889,7 +923,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 
   const uint16_t unresolved_child_id_;
   const RegTypeCache* const reg_type_cache_;
@@ -925,7 +959,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 
   const RegTypeCache* const reg_type_cache_;
 
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 000cf7c..d477ecd 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -68,13 +68,17 @@
   return (it == dex_deps_.end()) ? nullptr : it->second.get();
 }
 
+// Access flags that impact vdex verification.
+static constexpr uint32_t kAccVdexAccessFlags =
+    kAccPublic | kAccPrivate | kAccProtected | kAccStatic | kAccInterface;
+
 template <typename T>
 uint16_t VerifierDeps::GetAccessFlags(T* element) {
   static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
   if (element == nullptr) {
     return VerifierDeps::kUnresolvedMarker;
   } else {
-    uint16_t access_flags = Low16Bits(element->GetAccessFlags());
+    uint16_t access_flags = Low16Bits(element->GetAccessFlags()) & kAccVdexAccessFlags;
     CHECK_NE(access_flags, VerifierDeps::kUnresolvedMarker);
     return access_flags;
   }
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0dd82ab..194f4a1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -127,6 +127,9 @@
         Assert.assertEquals("I", /* Small latin dotless i */ "\u0131".toUpperCase());
         Assert.assertEquals("abc", "a\u0131c".replace('\u0131', 'b'));
         Assert.assertEquals("a\u0131c", "abc".replace('b', '\u0131'));
+
+        // Regression test for scratch register exhaustion in String.equals() intrinsic on arm64.
+        Assert.assertFalse(result.equals("Very long constant string, so that the known constant count field cannot be embedded in a CMP immediate instruction on arm64. Since it can hold 12-bit values, optionally shifted left by 12, let's go somewhere over 2^12, i.e. 4096. That should trigger the bug with or without string compression. 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/"));
     }
 
     public static void testCompareToAndEquals() {
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index dd89d64..e2a1001 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -33,7 +33,7 @@
 14 (class java.lang.Short)
 [java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
 [private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.doReplace(char,char), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/152-dead-large-object/expected.txt
similarity index 100%
rename from test/577-profile-foreign-dex/expected.txt
rename to test/152-dead-large-object/expected.txt
diff --git a/test/152-dead-large-object/info.txt b/test/152-dead-large-object/info.txt
new file mode 100644
index 0000000..45023cd
--- /dev/null
+++ b/test/152-dead-large-object/info.txt
@@ -0,0 +1 @@
+Test that large objects are freed properly after a GC.
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/152-dead-large-object/src/Main.java
similarity index 65%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/152-dead-large-object/src/Main.java
index cba73b3..72fd25c 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/152-dead-large-object/src/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public class Main {
+    static volatile Object a[] = null;
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 10; ++i) {
+            a = new Object[i * 300000];
+            Runtime.getRuntime().gc();
+        }
+    }
 }
diff --git a/test/154-gc-loop/src/Main.java b/test/154-gc-loop/src/Main.java
index 3a256c1..69015b6 100644
--- a/test/154-gc-loop/src/Main.java
+++ b/test/154-gc-loop/src/Main.java
@@ -38,7 +38,7 @@
         }
     } catch (Exception e) {}
     System.out.println("Finalize count too large: " +
-            ((finalizeCounter >= 10) ? Integer.toString(finalizeCounter) : "false"));
+            ((finalizeCounter >= 15) ? Integer.toString(finalizeCounter) : "false"));
   }
 
   private static native void backgroundProcessState();
diff --git a/test/155-java-set-resolved-type/src/Main.java b/test/155-java-set-resolved-type/src/Main.java
index 56b8c3e..8f79bd7 100644
--- a/test/155-java-set-resolved-type/src/Main.java
+++ b/test/155-java-set-resolved-type/src/Main.java
@@ -57,8 +57,8 @@
             // we need to find TestInterface.
             clearResolvedTypes(timpl);
 
-            // Force intialization of TestClass2. This expects the interface type to be
-            // resolved and found through simple lookup.
+            // Force intialization of TestImplementation. This expects the interface type
+            // to be resolved and found through simple lookup.
             timpl.newInstance();
         } catch (Throwable t) {
             t.printStackTrace();
diff --git a/test/157-void-class/expected.txt b/test/157-void-class/expected.txt
new file mode 100644
index 0000000..3f61c0b
--- /dev/null
+++ b/test/157-void-class/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+void.class = void
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/157-void-class/info.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/157-void-class/info.txt
diff --git a/test/157-void-class/run b/test/157-void-class/run
new file mode 100755
index 0000000..59e852c
--- /dev/null
+++ b/test/157-void-class/run
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Let the test build its own core image with --no-image and use verify-profile,
+# so that the compiler does not try to initialize classes. This leaves the
+# java.lang.Void compile-time verified but uninitialized.
+./default-run "$@" --no-image \
+    --runtime-option -Ximage-compiler-option \
+    --runtime-option --compiler-filter=verify-profile
diff --git a/test/157-void-class/src/Main.java b/test/157-void-class/src/Main.java
new file mode 100644
index 0000000..322b705
--- /dev/null
+++ b/test/157-void-class/src/Main.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import libcore.util.EmptyArray;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            // Check if we're running dalvik or RI.
+            Class<?> class_loader_class = Class.forName("dalvik.system.PathClassLoader");
+            System.loadLibrary(args[0]);
+        } catch (ClassNotFoundException e) {
+            usingRI = true;
+            // Add expected JNI_OnLoad log line to match expected.txt.
+            System.out.println("JNI_OnLoad called");
+        }
+        try {
+            // Initialize all classes needed for old java.lang.Void.TYPE initialization.
+            Runnable.class.getMethod("run", EmptyArray.CLASS).getReturnType();
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+        // Clear the resolved types of the ojluni dex file to make sure there is no entry
+        // for "V", i.e. void.
+        clearResolvedTypes(Integer.class);
+        // With java.lang.Void being compile-time verified but uninitialized, initialize
+        // it now. Previously, this would indirectly initialize TYPE with the current,
+        // i.e. zero-initialized, value of TYPE. The only thing that could prevent the
+        // series of calls leading to this was a cache hit in Class.getDexCacheType()
+        // which we have prevented by clearing the cache above.
+        Class<?> voidClass = void.class;
+        System.out.println("void.class = " + voidClass);
+    }
+
+    public static void clearResolvedTypes(Class<?> c) {
+        if (!usingRI) {
+            nativeClearResolvedTypes(c);
+        }
+    }
+
+    public static native void nativeClearResolvedTypes(Class<?> c);
+
+    static boolean usingRI = false;
+}
diff --git a/test/158-app-image-class-table/expected.txt b/test/158-app-image-class-table/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/158-app-image-class-table/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/158-app-image-class-table/info.txt b/test/158-app-image-class-table/info.txt
new file mode 100644
index 0000000..c844c8e
--- /dev/null
+++ b/test/158-app-image-class-table/info.txt
@@ -0,0 +1,3 @@
+Regression test for app image class table being erroneously omitted
+when it contains only boot image class loader classes while dex caches
+were written with references to these classes.
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/158-app-image-class-table/profile
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/158-app-image-class-table/profile
diff --git a/test/577-profile-foreign-dex/run b/test/158-app-image-class-table/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/158-app-image-class-table/run
index ad57d14..146e180 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/158-app-image-class-table/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/158-app-image-class-table/src/Main.java b/test/158-app-image-class-table/src/Main.java
new file mode 100644
index 0000000..804468f
--- /dev/null
+++ b/test/158-app-image-class-table/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static String TEST_NAME = "158-app-image-class-table";
+
+    public static void main(String[] args) {
+        try {
+            Class<?> class_loader_class = Class.forName("dalvik.system.PathClassLoader");
+            System.loadLibrary(args[0]);
+        } catch (ClassNotFoundException e) {
+            usingRI = true;
+            // Add expected JNI_OnLoad log line to match expected.txt.
+            System.out.println("JNI_OnLoad called");
+        }
+        try {
+            // Resolve but do not initialize TestImplementation. During the resolution,
+            // we see the Cloneable in the dex cache, so we do not try to look it up
+            // or resolve it.
+            Class<?> timpl =
+                Class.forName("TestImplementation", false, Main.class.getClassLoader());
+            // Clear the dex cache resolved types to force a proper lookup the next time
+            // we need to find TestInterface.
+            clearResolvedTypes(timpl);
+            // Force intialization of TestImplementation. This expects the interface type
+            // to be resolved and found through simple lookup.
+            timpl.newInstance();
+        } catch (Throwable t) {
+            t.printStackTrace();
+        }
+    }
+
+    public static void clearResolvedTypes(Class<?> c) {
+        if (!usingRI) {
+            nativeClearResolvedTypes(c);
+        }
+    }
+
+    private static boolean usingRI = false;
+
+    public static native void nativeClearResolvedTypes(Class<?> c);
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/158-app-image-class-table/src/TestImplementation.java
similarity index 74%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/158-app-image-class-table/src/TestImplementation.java
index cba73b3..558e587 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/158-app-image-class-table/src/TestImplementation.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public class TestImplementation implements Cloneable {
+    public Object clone() {
+        return new TestImplementation();
+    }
 }
diff --git a/test/201-built-in-except-detail-messages/src/Main.java b/test/201-built-in-except-detail-messages/src/Main.java
index dc58819..c2976c8 100644
--- a/test/201-built-in-except-detail-messages/src/Main.java
+++ b/test/201-built-in-except-detail-messages/src/Main.java
@@ -411,7 +411,7 @@
       m.invoke("hello", "world"); // Wrong type.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("method java.lang.String.charAt! argument 1 has type int, got java.lang.String",
+      assertEquals("method java.lang.String.charAt argument 1 has type int, got java.lang.String",
           iae.getMessage());
     }
     try {
@@ -419,7 +419,7 @@
       m.invoke("hello", (Object) null); // Null for a primitive argument.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("method java.lang.String.charAt! argument 1 has type int, got null",
+      assertEquals("method java.lang.String.charAt argument 1 has type int, got null",
           iae.getMessage());
     }
     try {
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 9f4be6c..6632503 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -747,6 +747,69 @@
     return 1.0f;
   }
 
+  /// CHECK-START: TestClass2 Main.testStoreStore() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: TestClass2 Main.testStoreStore() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
+
+  private static TestClass2 testStoreStore() {
+    TestClass2 obj = new TestClass2();
+    obj.i = 41;
+    obj.j = 42;
+    obj.i = 41;
+    obj.j = 43;
+    return obj;
+  }
+
+  /// CHECK-START: int Main.testStoreStoreWithDeoptimize(int[]) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Deoptimize
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testStoreStoreWithDeoptimize(int[]) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: Deoptimize
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK-NOT: ArrayGet
+
+  private static int testStoreStoreWithDeoptimize(int[] arr) {
+    TestClass2 obj = new TestClass2();
+    obj.i = 41;
+    obj.j = 42;
+    obj.i = 41;
+    obj.j = 43;
+    arr[0] = 1;  // One HDeoptimize here.
+    arr[1] = 1;
+    arr[2] = 1;
+    arr[3] = 1;
+    return arr[0] + arr[1] + arr[2] + arr[3];
+  }
+
   /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (before)
   /// CHECK: NewInstance
 
@@ -785,6 +848,86 @@
     return new Circle(Math.PI).getArea();
   }
 
+  /// CHECK-START: int Main.testAllocationEliminationOfArray1() load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray1() load_store_elimination (after)
+  /// CHECK-NOT: NewArray
+  /// CHECK-NOT: ArraySet
+  /// CHECK-NOT: ArrayGet
+  private static int testAllocationEliminationOfArray1() {
+    int[] array = new int[4];
+    array[2] = 4;
+    array[3] = 7;
+    return array[0] + array[1] + array[2] + array[3];
+  }
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray2() load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray2() load_store_elimination (after)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  private static int testAllocationEliminationOfArray2() {
+    // Cannot eliminate array allocation since array is accessed with non-constant
+    // index.
+    int[] array = new int[4];
+    array[2] = 4;
+    array[3] = 7;
+    int sum = 0;
+    for (int e : array) {
+      sum += e;
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray3(int) load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray3(int) load_store_elimination (after)
+  /// CHECK-NOT: NewArray
+  /// CHECK-NOT: ArraySet
+  /// CHECK-NOT: ArrayGet
+  private static int testAllocationEliminationOfArray3(int i) {
+    int[] array = new int[4];
+    array[i] = 4;
+    return array[i];
+  }
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray4(int) load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray4(int) load_store_elimination (after)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK-NOT: ArrayGet
+  private static int testAllocationEliminationOfArray4(int i) {
+    // Cannot eliminate array allocation due to index aliasing between 1 and i.
+    int[] array = new int[4];
+    array[1] = 2;
+    array[i] = 4;
+    return array[1] + array[i];
+  }
+
   static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -865,6 +1008,15 @@
     assertDoubleEquals(darray[0], Math.PI);
     assertDoubleEquals(darray[1], Math.PI);
     assertDoubleEquals(darray[2], Math.PI);
+
+    assertIntEquals(testAllocationEliminationOfArray1(), 11);
+    assertIntEquals(testAllocationEliminationOfArray2(), 11);
+    assertIntEquals(testAllocationEliminationOfArray3(2), 4);
+    assertIntEquals(testAllocationEliminationOfArray4(2), 6);
+
+    assertIntEquals(testStoreStore().i, 41);
+    assertIntEquals(testStoreStore().j, 43);
+    assertIntEquals(testStoreStoreWithDeoptimize(new int[4]), 4);
   }
 
   static boolean sFlag;
diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java
index 2c701bb..61c9e88 100644
--- a/test/532-checker-nonnull-arrayset/src/Main.java
+++ b/test/532-checker-nonnull-arrayset/src/Main.java
@@ -30,10 +30,14 @@
   /// CHECK:          ReturnVoid
   public static void test() {
     Object[] array = new Object[2];
+    // Storing to static to avoid some lse optimization.
+    sArray = array;
     Object nonNull = array[0];
     nonNull.getClass(); // Ensure nonNull has an implicit null check.
     array[1] = nonNull;
   }
 
   public static void main(String[] args) {}
+
+  static Object[] sArray;
 }
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 52f3f84..e395e28 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -330,6 +330,21 @@
   // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
   // whichever comes first.
   /// CHECK:          cmp {{w.*,}} {{w.*|#.*}}
+
+  /// CHECK-START-MIPS: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      beq r0,
+  /// CHECK-NOT:      beqz
+  /// CHECK-NOT:      beqzc
+  // Terminate the scope for the CHECK-NOT search at the class field or length comparison,
+  // whichever comes first.
+  /// CHECK:          lw
+
+  /// CHECK-START-MIPS64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      beqzc
+  // Terminate the scope for the CHECK-NOT search at the reference comparison.
+  /// CHECK:          beqc
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
     return "foo".equals(obj);
@@ -384,6 +399,22 @@
   /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
   /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
   /// CHECK:          cmp {{w\d+}}, {{w\d+|#.*}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-MIPS: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          beq{{(zc)?}}
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      lw {{r\d+}}, +0({{r\d+}})
+  /// CHECK:          bne{{c?}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-MIPS64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          beqzc
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      lw {{r\d+}}, +0({{r\d+}})
+  /// CHECK:          bnec
   public static boolean stringArgumentIsString() {
     return "foo".equals(myString);
   }
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 4f34ec9..94aad9d 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -37,13 +37,20 @@
   }
 
   /// CHECK-START-ARM: int Main.and511(int) disassembly (after)
-  /// CHECK:                mov {{r\d+}}, #511
-  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                ubfx {{r\d+}}, {{r\d+}}, #0, #9
 
   public static int and511(int arg) {
     return arg & 511;
   }
 
+  /// CHECK-START-ARM: int Main.andF00D(int) disassembly (after)
+  /// CHECK:                mov {{r\d+}}, #61453
+  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int andF00D(int arg) {
+    return arg & 0xF00D;
+  }
+
   /// CHECK-START-ARM: int Main.andNot15(int) disassembly (after)
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK:                bic {{r\d+}}, {{r\d+}}, #0xf
@@ -114,19 +121,31 @@
   }
 
   /// CHECK-START-ARM: long Main.and511(long) disassembly (after)
-  /// CHECK:                mov {{r\d+}}, #511
+  /// CHECK:                ubfx {{r\d+}}, {{r\d+}}, #0, #9
   /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
-  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
-  /// CHECK-NEXT:           and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
-  /// CHECK-NOT:            and{{(\.w)?}}
-  /// CHECK-NOT:            bic{{(\.w)?}}
 
   public static long and511(long arg) {
     return arg & 511L;
   }
 
+  /// CHECK-START-ARM: long Main.andF00D(long) disassembly (after)
+  /// CHECK:                mov {{r\d+}}, #61453
+  /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
+  /// CHECK-NOT:            and{{(\.w)?}}
+  /// CHECK-NOT:            bic{{(\.w)?}}
+  /// CHECK-NOT:            ubfx
+  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NEXT:           and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            and{{(\.w)?}}
+  /// CHECK-NOT:            bic{{(\.w)?}}
+  /// CHECK-NOT:            ubfx
+
+  public static long andF00D(long arg) {
+    return arg & 0xF00DL;
+  }
+
   /// CHECK-START-ARM: long Main.andNot15(long) disassembly (after)
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            and{{(\.w)?}}
@@ -631,6 +650,7 @@
     int arg = 0x87654321;
     assertIntEquals(and255(arg), 0x21);
     assertIntEquals(and511(arg), 0x121);
+    assertIntEquals(andF00D(arg), 0x4001);
     assertIntEquals(andNot15(arg), 0x87654320);
     assertIntEquals(or255(arg), 0x876543ff);
     assertIntEquals(or511(arg), 0x876543ff);
@@ -642,6 +662,7 @@
     long longArg = 0x1234567887654321L;
     assertLongEquals(and255(longArg), 0x21L);
     assertLongEquals(and511(longArg), 0x121L);
+    assertLongEquals(andF00D(longArg), 0x4001L);
     assertLongEquals(andNot15(longArg), 0x1234567887654320L);
     assertLongEquals(and0xfffffff00000000f(longArg), 0x1234567000000001L);
     assertLongEquals(or255(longArg), 0x12345678876543ffL);
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
index a4561b8..e967398 100644
--- a/test/551-checker-shifter-operand/src/Main.java
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -76,6 +76,25 @@
    * the shifter operand.
    */
 
+  /// CHECK-START-ARM: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm (before)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:       <<tmp:j\d+>>         TypeConversion [<<b>>]
+  /// CHECK:                            Sub [<<l>>,<<tmp>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm (after)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:                            DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        TypeConversion
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$translate(long, byte) disassembly (after)
+  /// CHECK:                            subs r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                            sbc r{{\d+}}, r{{\d+}}, r{{\d+}}, asr #31
+
   /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before)
   /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
   /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
@@ -85,7 +104,7 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
   /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
   /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
-  /// CHECK:                            Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
+  /// CHECK:                            DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        TypeConversion
@@ -106,6 +125,21 @@
    * inputs are the the IR.
    */
 
+  /// CHECK-START-ARM: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm (before)
+  /// CHECK:       <<a:i\d+>>           ParameterValue
+  /// CHECK:       <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<tmp>>,<<tmp>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:   <<a:i\d+>>           ParameterValue
+  /// CHECK-DAG:   <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<Shl:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<Shl>>,<<Shl>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before)
   /// CHECK:       <<a:i\d+>>           ParameterValue
   /// CHECK:       <<Const2:i\d+>>      IntConstant 2
@@ -119,7 +153,7 @@
   /// CHECK:                            Add [<<Shl>>,<<Shl>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   public static int $opt$noinline$sameInput(int a) {
     if (doThrow) throw new Error();
@@ -131,6 +165,28 @@
    * Check that we perform the merge for multiple uses.
    */
 
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm (before)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:       <<Const23:i\d+>>     IntConstant 23
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<arg>>,<<Const23>>]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm (after)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Add
+
   /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before)
   /// CHECK:       <<arg:i\d+>>         ParameterValue
   /// CHECK:       <<Const23:i\d+>>     IntConstant 23
@@ -143,11 +199,11 @@
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<arg:i\d+>>         ParameterValue
-  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
-  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
-  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
-  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
-  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        Shl
@@ -171,9 +227,19 @@
    * operand, so test that only the shifts are merged.
    */
 
+  /// CHECK-START-ARM: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
+  /// CHECK:                            and lsl
+  /// CHECK:                            sbfx
+  /// CHECK:                            asr
+  /// CHECK:                            and
+
   /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
   /// CHECK:                            and lsl
@@ -186,9 +252,18 @@
                      (a & (b << 5)) | (a & (byte)b));
   }
 
+  /// CHECK-START-ARM: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$noinline$testOr(int, int) disassembly (after)
+  /// CHECK:                            orr asr
+  /// CHECK:                            ubfx
+  /// CHECK:                            orr
+
   /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after)
   /// CHECK:                            orr asr
@@ -201,9 +276,19 @@
                     (a | (b >> 6)) | (a | (char)b));
   }
 
+  /// CHECK-START-ARM: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$noinline$testXor(long, long) disassembly (after)
+  /// CHECK:                            eor lsr
+  /// CHECK:                            mov
+  /// CHECK:                            asr
+  /// CHECK:                            eor
+
   /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after)
   /// CHECK:                            eor lsr
@@ -216,9 +301,12 @@
                      (a ^ (b >>> 7)) | (a ^ (int)b));
   }
 
+  /// CHECK-START-ARM: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                            DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after)
   /// CHECK:                            neg lsl
@@ -239,9 +327,12 @@
    * does occur on the right-hand.
    */
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        TypeConversion
@@ -252,9 +343,11 @@
     assertIntEquals(a + $noinline$byteToShort(b), a + (short)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   public static void $opt$validateExtendByteInt2(int a, byte b) {
     // The conversion to `int` has been optimized away, so there is nothing to merge.
@@ -263,13 +356,25 @@
     assertLongEquals(a + $noinline$byteToLong(b), a + (long)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
   /// CHECK:                            TypeConversion
@@ -294,9 +399,12 @@
     $opt$validateExtendByteLong(a, b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        TypeConversion
@@ -306,22 +414,41 @@
     assertIntEquals(a + $noinline$charToShort(b), a + (short)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   public static void $opt$validateExtendCharInt2(int a, char b) {
     // The conversion to `int` has been optimized away, so there is nothing to merge.
     assertIntEquals (a + $noinline$charToInt (b), a +  (int)b);
-    // There is an environment use for `(long)b`, preventing the merge.
+    // There is an environment use for `(long)b` and the implicit `(long)a`, preventing the merge.
     assertLongEquals(a + $noinline$charToLong(b), a + (long)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
   /// CHECK:                            TypeConversion
@@ -332,7 +459,7 @@
     // The first two tests have a type conversion.
     assertLongEquals(a + $noinline$charToByte (b), a +  (byte)b);
     assertLongEquals(a + $noinline$charToShort(b), a + (short)b);
-    // This test does not because the conversion to `int` is optimized away.
+    // On ARM64 this test does not because the conversion to `int` is optimized away.
     assertLongEquals(a + $noinline$charToInt  (b), a +   (int)b);
   }
 
@@ -342,9 +469,12 @@
     $opt$validateExtendCharLong(a, b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        TypeConversion
@@ -354,21 +484,41 @@
     assertIntEquals(a + $noinline$shortToChar (b), a + (char)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
-  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   public static void $opt$validateExtendShortInt2(int a, short b) {
     // The conversion to `int` has been optimized away, so there is nothing to merge.
     assertIntEquals (a + $noinline$shortToInt  (b), a +  (int)b);
-    // There is an environment use for `(long)b`, preventing the merge.
+    // There is an environment use for `(long)b` and the implicit `(long)a`, preventing the merge.
     assertLongEquals(a + $noinline$shortToLong (b), a + (long)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
   /// CHECK:                            TypeConversion
@@ -379,7 +529,7 @@
     // The first two tests have a type conversion.
     assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b);
     assertLongEquals(a + $noinline$shortToChar(b), a + (char)b);
-    // This test does not because the conversion to `int` is optimized away.
+    // On ARM64 this test does not because the conversion to `int` is optimized away.
     assertLongEquals(a + $noinline$shortToInt (b), a +  (int)b);
   }
 
@@ -389,11 +539,31 @@
     $opt$validateExtendShortLong(a, b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
   /// CHECK:                            TypeConversion
@@ -411,11 +581,34 @@
     assertLongEquals(a + $noinline$intToLong (b), a +  (long)b);
   }
 
+  /// CHECK-START-ARM: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  /// CHECK-START-ARM: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
   /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:                            TypeConversion
@@ -449,40 +642,83 @@
 
 
   // Each test line below should see one merge.
+  /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+  // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier.
+
+  /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
   /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
   // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier.
 
   /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
@@ -552,43 +788,89 @@
   }
 
   // Each test line below should see one merge.
+  /// CHECK-START-ARM: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm (after)
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
+
+  // On ARM shifts by 1 are not merged.
+  /// CHECK-START-ARM: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm (after)
+  /// CHECK:                            Shl
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            Shr
+  /// CHECK-NOT:                        Shr
+  /// CHECK:                            UShr
+  /// CHECK-NOT:                        UShr
+
   /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK:                            DataProcWithShifterOp
+  /// CHECK-NOT:                        DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        Shl
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index e0a76ca..3ac6f89 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -371,6 +371,49 @@
     return a > b ? x : y;
   }
 
+  /// CHECK-START-ARM: long Main.$noinline$LongEqNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NEXT:            it eq
+  /// CHECK-NEXT:            cmpeq {{r\d+}}, {{r\d+}}
+  /// CHECK-NEXT:            it eq
+
+  public static long $noinline$LongEqNonmatCond_LongVarVar(long a, long b, long x, long y) {
+    return a == b ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            mov ip, #52720
+  /// CHECK-NEXT:            movt ip, #35243
+  /// CHECK-NEXT:            cmp {{r\d+}}, ip
+  /// CHECK-NEXT:            sbcs ip, {{r\d+}}, #{{\d+}}
+  /// CHECK-NEXT:            it ge
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar(long a, long x, long y) {
+    return a > 0x89ABCDEFL ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar2(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            mov ip, #{{\d+}}
+  /// CHECK-NEXT:            movt ip, #{{\d+}}
+  /// CHECK-NEXT:            cmp {{r\d+}}, ip
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar2(long a, long x, long y) {
+    return a > 0x0123456789ABCDEFL ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar3(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:             sbcs
+  /// CHECK-NOT:             cmp
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar3(long a, long x, long y) {
+    return a > 0x7FFFFFFFFFFFFFFFL ? x : y;
+  }
+
   /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -612,6 +655,39 @@
     assertEqual(5, IntMatCond_IntVarVar(3, 2, 5, 7));
     assertEqual(8, IntMatCond_IntVarVar(2, 3, 5, 7));
 
+    assertEqual(0xAAAAAAAA55555555L,
+                LongNonmatCond_LongVarVar(3L, 2L, 0xAAAAAAAA55555555L, 0x8888888877777777L));
+    assertEqual(0x8888888877777777L,
+                LongNonmatCond_LongVarVar(2L, 2L, 0xAAAAAAAA55555555L, 0x8888888877777777L));
+    assertEqual(0x8888888877777777L,
+                LongNonmatCond_LongVarVar(2L, 3L, 0xAAAAAAAA55555555L, 0x8888888877777777L));
+    assertEqual(0xAAAAAAAA55555555L, LongNonmatCond_LongVarVar(0x0000000100000000L,
+                                                               0x00000000FFFFFFFFL,
+                                                               0xAAAAAAAA55555555L,
+                                                               0x8888888877777777L));
+    assertEqual(0x8888888877777777L, LongNonmatCond_LongVarVar(0x00000000FFFFFFFFL,
+                                                               0x0000000100000000L,
+                                                               0xAAAAAAAA55555555L,
+                                                               0x8888888877777777L));
+
+    assertEqual(0x8888888877777777L, $noinline$LongEqNonmatCond_LongVarVar(2L,
+                                                                           3L,
+                                                                           0xAAAAAAAA55555555L,
+                                                                           0x8888888877777777L));
+    assertEqual(0xAAAAAAAA55555555L, $noinline$LongEqNonmatCond_LongVarVar(2L,
+                                                                           2L,
+                                                                           0xAAAAAAAA55555555L,
+                                                                           0x8888888877777777L));
+    assertEqual(0x8888888877777777L, $noinline$LongEqNonmatCond_LongVarVar(0x10000000000L,
+                                                                           0L,
+                                                                           0xAAAAAAAA55555555L,
+                                                                           0x8888888877777777L));
+
+    assertEqual(5L, $noinline$LongNonmatCondCst_LongVarVar2(0x7FFFFFFFFFFFFFFFL, 5L, 7L));
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar2(2L, 5L, 7L));
+
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar3(2L, 5L, 7L));
+
     assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
diff --git a/test/577-profile-foreign-dex/info.txt b/test/577-profile-foreign-dex/info.txt
deleted file mode 100644
index 090db3f..0000000
--- a/test/577-profile-foreign-dex/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Check that we record the use of foreign dex files when profiles are enabled.
diff --git a/test/577-profile-foreign-dex/src/Main.java b/test/577-profile-foreign-dex/src/Main.java
deleted file mode 100644
index ed7a625..0000000
--- a/test/577-profile-foreign-dex/src/Main.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Constructor;
-import java.util.HashMap;
-
-public class Main {
-
-  private static final String PROFILE_NAME = "primary.prof";
-  private static final String APP_DIR_PREFIX = "app_dir_";
-  private static final String FOREIGN_DEX_PROFILE_DIR = "foreign-dex";
-  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
-  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
-
-  public static void main(String[] args) throws Exception {
-    File tmpFile = null;
-    File appDir = null;
-    File profileFile = null;
-    File foreignDexProfileDir = null;
-
-    try {
-      // Create the necessary files layout.
-      tmpFile = createTempFile();
-      appDir = new File(tmpFile.getParent(), APP_DIR_PREFIX + tmpFile.getName());
-      appDir.mkdir();
-      foreignDexProfileDir = new File(tmpFile.getParent(), FOREIGN_DEX_PROFILE_DIR);
-      foreignDexProfileDir.mkdir();
-      profileFile = createTempFile();
-
-      String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
-
-      // Register the app with the runtime
-      VMRuntime.registerAppInfo(profileFile.getPath(), appDir.getPath(),
-             new String[] { codePath }, foreignDexProfileDir.getPath());
-
-      testMarkerForForeignDex(foreignDexProfileDir);
-      testMarkerForCodePath(foreignDexProfileDir);
-      testMarkerForApplicationDexFile(foreignDexProfileDir, appDir);
-    } finally {
-      if (tmpFile != null) {
-        tmpFile.delete();
-      }
-      if (profileFile != null) {
-        profileFile.delete();
-      }
-      if (foreignDexProfileDir != null) {
-        foreignDexProfileDir.delete();
-      }
-      if (appDir != null) {
-        appDir.delete();
-      }
-    }
-  }
-
-  // Verify we actually create a marker on disk for foreign dex files.
-  private static void testMarkerForForeignDex(File foreignDexProfileDir) throws Exception {
-    String foreignDex = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar";
-    loadDexFile(foreignDex);
-    checkMarker(foreignDexProfileDir, foreignDex, /* exists */ true);
-  }
-
-  // Verify we do not create a marker on disk for dex files path of the code path.
-  private static void testMarkerForCodePath(File foreignDexProfileDir) throws Exception {
-    String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
-    loadDexFile(codePath);
-    checkMarker(foreignDexProfileDir, codePath, /* exists */ false);
-  }
-
-  private static void testMarkerForApplicationDexFile(File foreignDexProfileDir, File appDir)
-      throws Exception {
-    // Copy the -ex jar to the application directory and load it from there.
-    // This will record duplicate class conflicts but we don't care for this use case.
-    File foreignDex = new File(System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar");
-    File appDex = new File(appDir, "appDex.jar");
-    try {
-      copyFile(foreignDex, appDex);
-
-      loadDexFile(appDex.getAbsolutePath());
-      checkMarker(foreignDexProfileDir, appDex.getAbsolutePath(), /* exists */ false);
-    } finally {
-      if (appDex != null) {
-        appDex.delete();
-      }
-    }
-  }
-
-  private static void checkMarker(File foreignDexProfileDir, String dexFile, boolean exists) {
-    File marker = new File(foreignDexProfileDir, dexFile.replace('/', '@'));
-    boolean result_ok = exists ? marker.exists() : !marker.exists();
-    if (!result_ok) {
-      throw new RuntimeException("Marker test failed for:" + marker.getPath());
-    }
-  }
-
-  private static void loadDexFile(String dexFile) throws Exception {
-    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
-    if (pathClassLoader == null) {
-        throw new RuntimeException("Couldn't find path class loader class");
-    }
-    Constructor<?> constructor =
-        pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
-    constructor.newInstance(
-            dexFile, ClassLoader.getSystemClassLoader());
-  }
-
-  private static class VMRuntime {
-    private static final Method registerAppInfoMethod;
-    static {
-      try {
-        Class<?> c = Class.forName("dalvik.system.VMRuntime");
-        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
-            String.class, String.class, String[].class, String.class);
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    public static void registerAppInfo(String pkgName, String appDir,
-        String[] codePath, String foreignDexProfileDir) throws Exception {
-      registerAppInfoMethod.invoke(null, pkgName, appDir, codePath, foreignDexProfileDir);
-    }
-  }
-
-  private static void copyFile(File fromFile, File toFile) throws Exception {
-    FileInputStream in = new FileInputStream(fromFile);
-    FileOutputStream out = new FileOutputStream(toFile);
-    try {
-      byte[] buffer = new byte[4096];
-      int bytesRead;
-      while ((bytesRead = in.read(buffer)) >= 0) {
-          out.write(buffer, 0, bytesRead);
-      }
-    } finally {
-      out.flush();
-      try {
-          out.getFD().sync();
-      } catch (IOException e) {
-      }
-      out.close();
-      in.close();
-    }
-  }
-
-  private static File createTempFile() throws Exception {
-    try {
-      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-    } catch (IOException e) {
-      System.setProperty("java.io.tmpdir", "/data/local/tmp");
-      try {
-        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-      } catch (IOException e2) {
-        System.setProperty("java.io.tmpdir", "/sdcard");
-        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-      }
-    }
-  }
-}
diff --git a/test/593-checker-shift-and-simplifier/src/Main.java b/test/593-checker-shift-and-simplifier/src/Main.java
index 65e809a..c9826bc 100644
--- a/test/593-checker-shift-and-simplifier/src/Main.java
+++ b/test/593-checker-shift-and-simplifier/src/Main.java
@@ -21,6 +21,17 @@
   // A very particular set of operations that caused a double removal by the
   // ARM64 simplifier doing "forward" removals (b/27851582).
 
+  /// CHECK-START-ARM: int Main.operations() instruction_simplifier_arm (before)
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet
+  /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
+  /// CHECK-DAG: <<Shl:i\d+>> Shl [<<Get>>,i{{\d+}}]
+  /// CHECK-DAG:              And [<<Not>>,<<Shl>>]
+  //
+  /// CHECK-START-ARM: int Main.operations() instruction_simplifier_arm (after)
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet
+  /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
+  /// CHECK-DAG:              DataProcWithShifterOp [<<Not>>,<<Get>>] kind:And+LSL shift:2
+
   /// CHECK-START-ARM64: int Main.operations() instruction_simplifier_arm64 (before)
   /// CHECK-DAG: <<Get:i\d+>> ArrayGet
   /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
@@ -30,7 +41,7 @@
   /// CHECK-START-ARM64: int Main.operations() instruction_simplifier_arm64 (after)
   /// CHECK-DAG: <<Get:i\d+>> ArrayGet
   /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
-  /// CHECK-DAG:              Arm64DataProcWithShifterOp [<<Not>>,<<Get>>] kind:And+LSL shift:2
+  /// CHECK-DAG:              DataProcWithShifterOp [<<Not>>,<<Get>>] kind:And+LSL shift:2
   private static int operations() {
      int r = a[0];
      int n = ~r;
diff --git a/test/595-profile-saving/src/Main.java b/test/595-profile-saving/src/Main.java
index 039503f..faf94c4 100644
--- a/test/595-profile-saving/src/Main.java
+++ b/test/595-profile-saving/src/Main.java
@@ -29,9 +29,7 @@
       // String codePath = getDexBaseLocation();
       String codePath = System.getenv("DEX_LOCATION") + "/595-profile-saving.jar";
       VMRuntime.registerAppInfo(file.getPath(),
-                                System.getenv("DEX_LOCATION"),
-                                new String[] {codePath},
-                                /* foreignProfileDir */ null);
+                                new String[] {codePath});
 
       int methodIdx = $opt$noinline$testProfile();
       ensureProfileProcessing();
@@ -85,15 +83,15 @@
       try {
         Class<? extends Object> c = Class.forName("dalvik.system.VMRuntime");
         registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
-            String.class, String.class, String[].class, String.class);
+            String.class, String[].class);
       } catch (Exception e) {
         throw new RuntimeException(e);
       }
     }
 
-    public static void registerAppInfo(String profile, String appDir,
-                                       String[] codePaths, String foreignDir) throws Exception {
-      registerAppInfoMethod.invoke(null, profile, appDir, codePaths, foreignDir);
+    public static void registerAppInfo(String profile, String[] codePaths)
+        throws Exception {
+      registerAppInfoMethod.invoke(null, profile, codePaths);
     }
   }
 }
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index ad3ff44..2d9daf1 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -21,6 +21,8 @@
 
   static int[] a = new int[10];
 
+  static int[] novec = new int[20];  // to prevent vectorization
+
   /// CHECK-START: void Main.deadSingleLoop() loop_optimization (before)
   /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
   //
@@ -132,16 +134,18 @@
   /// CHECK-START: void Main.deadInduction() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.deadInduction() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   static void deadInduction() {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
-      a[i] = 1;
+      a[i] = novec[2 * i] + 1;
       dead += 5;
     }
   }
@@ -151,17 +155,19 @@
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.deadManyInduction() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   static void deadManyInduction() {
     int dead1 = 0, dead2 = 1, dead3 = 3;
     for (int i = 0; i < a.length; i++) {
       dead1 += 5;
-      a[i] = 2;
+      a[i] = novec[2 * i] + 2;
       dead2 += 10;
       dead3 += 100;
     }
@@ -170,16 +176,18 @@
   /// CHECK-START: void Main.deadSequence() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.deadSequence() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   static void deadSequence() {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
-      a[i] = 3;
+      a[i] = novec[2 * i] + 3;
       // Increment value defined inside loop,
       // but sequence itself not used anywhere.
       dead += i;
@@ -191,17 +199,19 @@
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-NOT: BoundsCheck
   //
   /// CHECK-START: void Main.deadCycleWithException(int) loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-NOT: ArrayGet loop:<<Loop>>      outer_loop:none
   static void deadCycleWithException(int k) {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
-      a[i] = 4;
+      a[i] = novec[2 * i] + 4;
       // Increment value of dead cycle may throw exception. Dynamic
       // BCE takes care of the bounds check though, which enables
       // removing the ArrayGet after removing the dead cycle.
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 7509d9b..eee90ab 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -213,6 +213,8 @@
   /// CHECK-START: long Main.geoLongDivLastValue(long) instruction_simplifier$after_bce (after)
   /// CHECK-DAG: <<Long:j\d+>> LongConstant 0    loop:none
   /// CHECK-DAG:               Return [<<Long>>] loop:none
+  //
+  // Tests overflow in the divisor (while updating intermediate result).
   static long geoLongDivLastValue(long x) {
     for (int i = 0; i < 10; i++) {
       x /= 1081788608;
@@ -220,6 +222,26 @@
     return x;
   }
 
+  /// CHECK-START: long Main.geoLongDivLastValue() loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: long Main.geoLongDivLastValue() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  //
+  /// CHECK-START: long Main.geoLongDivLastValue() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Long:j\d+>> LongConstant 0    loop:none
+  /// CHECK-DAG:               Return [<<Long>>] loop:none
+  //
+  // Tests overflow in the divisor (while updating base).
+  static long geoLongDivLastValue() {
+    long x = -1;
+    for (int i2 = 0; i2 < 2; i2++) {
+      x /= (Long.MAX_VALUE);
+    }
+    return x;
+  }
+
   /// CHECK-START: long Main.geoLongMulLastValue(long) loop_optimization (before)
   /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
@@ -239,6 +261,15 @@
     return x;
   }
 
+  // If vectorized, the narrowing subscript should not cause
+  // type inconsistencies in the synthesized code.
+  static void narrowingSubscript(float[] a) {
+    float val = 2.0f;
+    for (long i = 0; i < a.length; i++) {
+      a[(int) i] += val;
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -286,6 +317,8 @@
     expectEquals(0L, geoLongDivLastValue(9223372036854775807L));
     expectEquals(0L, geoLongDivLastValue(-9223372036854775808L));
 
+    expectEquals(0L, geoLongDivLastValue());
+
     expectEquals(                   0L, geoLongMulLastValue(0L));
     expectEquals(-8070450532247928832L, geoLongMulLastValue(1L));
     expectEquals( 2305843009213693952L, geoLongMulLastValue(2L));
@@ -296,6 +329,12 @@
     expectEquals( 8070450532247928832L, geoLongMulLastValue(9223372036854775807L));
     expectEquals(                   0L, geoLongMulLastValue(-9223372036854775808L));
 
+    float[] a = new float[16];
+    narrowingSubscript(a);
+    for (int i = 0; i < 16; i++) {
+      expectEquals(2.0f, a[i]);
+    }
+
     System.out.println("passed");
   }
 
@@ -310,4 +349,10 @@
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
 }
diff --git a/test/626-checker-arm64-scratch-register/src/Main.java b/test/626-checker-arm64-scratch-register/src/Main.java
index 6dd4374..1394917 100644
--- a/test/626-checker-arm64-scratch-register/src/Main.java
+++ b/test/626-checker-arm64-scratch-register/src/Main.java
@@ -70,7 +70,7 @@
   /// CHECK:  end_block
   /// CHECK: begin_block
   /// CHECK:   name "<<ElseBlock>>"
-  /// CHECK:                      ParallelMove moves:[#100->d17,32(sp)->d1,36(sp)->d2,d17->d3,d3->d4,d4->d5,d5->d6,d6->d7,d7->d18,d18->d19,d19->d20,d20->d21,d21->d22,d22->d23,d23->d10,d10->d11,d11->d12,24(sp)->d13,28(sp)->d14,d14->16(sp),d12->20(sp),d13->24(sp),d1->28(sp),d2->32(sp),16(sp)->36(sp),20(sp)->40(sp)]
+  /// CHECK:                      ParallelMove moves:[40(sp)->d0,24(sp)->32(sp),28(sp)->36(sp),d0->d3,d3->d4,d2->d5,d4->d6,d5->d7,d6->d18,d7->d19,d18->d20,d19->d21,d20->d22,d21->d23,d22->d10,d23->d11,16(sp)->24(sp),20(sp)->28(sp),d10->d14,d11->d12,d12->d13,d13->d1,d14->d2,32(sp)->16(sp),36(sp)->20(sp)]
   /// CHECK: end_block
 
   /// CHECK-START-ARM64: void Main.test() disassembly (after)
@@ -85,7 +85,7 @@
   /// CHECK:  end_block
   /// CHECK: begin_block
   /// CHECK:   name "<<ElseBlock>>"
-  /// CHECK:                      ParallelMove moves:[invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid]
+  /// CHECK:                      ParallelMove moves:[invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid]
   /// CHECK:                        fmov d31, d2
   /// CHECK:                        ldr s2, [sp, #36]
   /// CHECK:                        ldr w16, [sp, #16]
@@ -111,11 +111,10 @@
   /// CHECK:                        fmov d6, d5
   /// CHECK:                        fmov d5, d4
   /// CHECK:                        fmov d4, d3
-  /// CHECK:                        fmov d3, d17
-  /// CHECK:                        fmov d17, d13
+  /// CHECK:                        fmov d3, d13
   /// CHECK:                        ldr s13, [sp, #24]
-  /// CHECK:                        str s17, [sp, #24]
-  /// CHECK:                        ldr s17, pc+{{\d+}} (addr {{0x[0-9a-f]+}}) (100)
+  /// CHECK:                        str s3, [sp, #24]
+  /// CHECK:                        ldr s3, pc+{{\d+}} (addr {{0x[0-9a-f]+}}) (100)
   /// CHECK: end_block
 
   public void test() {
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/638-checker-inline-caches/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/638-checker-inline-caches/expected.txt
diff --git a/test/638-checker-inline-caches/info.txt b/test/638-checker-inline-caches/info.txt
new file mode 100644
index 0000000..1fac628
--- /dev/null
+++ b/test/638-checker-inline-caches/info.txt
@@ -0,0 +1 @@
+Verify the use of inline caches in AOT mode.
diff --git a/test/638-checker-inline-caches/multidex.jpp b/test/638-checker-inline-caches/multidex.jpp
new file mode 100644
index 0000000..69a2cc1
--- /dev/null
+++ b/test/638-checker-inline-caches/multidex.jpp
@@ -0,0 +1,12 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
+Super:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Super
+SubA:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubA
+SubB
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubB
diff --git a/test/638-checker-inline-caches/profile b/test/638-checker-inline-caches/profile
new file mode 100644
index 0000000..1ca6d7b
--- /dev/null
+++ b/test/638-checker-inline-caches/profile
@@ -0,0 +1,6 @@
+LMain;->inlineMonomorphicSubA(LSuper;)I+LSubA;
+LMain;->inlinePolymophicSubASubB(LSuper;)I+LSubA;,LSubB;
+LMain;->inlinePolymophicCrossDexSubASubC(LSuper;)I+LSubA;,LSubC;
+LMain;->inlineMegamorphic(LSuper;)I+LSubA;,LSubB;,LSubC;,LSubD;,LSubE;
+LMain;->inlineMissingTypes(LSuper;)I+missing_types
+LMain;->noInlineCache(LSuper;)I
diff --git a/test/577-profile-foreign-dex/run b/test/638-checker-inline-caches/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/638-checker-inline-caches/run
index ad57d14..146e180 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/638-checker-inline-caches/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/638-checker-inline-caches/src-multidex/SubC.java
similarity index 80%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/638-checker-inline-caches/src-multidex/SubC.java
index cba73b3..f7e3c08 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/638-checker-inline-caches/src-multidex/SubC.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public class SubC extends Super   {
+  public int getValue() { return 24; }
 }
diff --git a/test/638-checker-inline-caches/src/Main.java b/test/638-checker-inline-caches/src/Main.java
new file mode 100644
index 0000000..2cee47e
--- /dev/null
+++ b/test/638-checker-inline-caches/src/Main.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class SubA extends Super {
+  int getValue() { return 42; }
+}
+
+class SubB extends Super {
+  int getValue() { return 38; }
+}
+
+class SubD extends Super {
+  int getValue() { return 10; }
+}
+
+class SubE extends Super {
+  int getValue() { return -4; }
+}
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (after)
+  /// CHECK:  <<SubARet:i\d+>>      IntConstant 42
+  /// CHECK:  <<ObjClass:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:  <<InlineClass:l\d+>>  LoadClass class_name:SubA
+  /// CHECK:  <<Test:z\d+>>         NotEqual [<<InlineClass>>,<<ObjClass>>]
+  /// CHECK:                        Deoptimize [<<Test>>]
+  /// CHECK:                        Return [<<SubARet>>]
+  public static int inlineMonomorphicSubA(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
+
+  // Note that the order in which the types are added to the inline cache in the profile matters.
+
+  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (after)
+  /// CHECK-DAG:  <<SubARet:i\d+>>          IntConstant 42
+  /// CHECK-DAG:  <<SubBRet:i\d+>>          IntConstant 38
+  /// CHECK:      <<ObjClassSubA:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
+  /// CHECK:      <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
+  /// CHECK:                                If [<<TestSubA>>]
+
+  /// CHECK:      <<ObjClassSubB:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubB:l\d+>>  LoadClass class_name:SubB
+  /// CHECK:      <<TestSubB:z\d+>>         NotEqual [<<InlineClassSubB>>,<<ObjClassSubB>>]
+  /// CHECK:                                Deoptimize [<<TestSubB>>]
+
+  /// CHECK:      <<Ret:i\d+>>              Phi [<<SubARet>>,<<SubBRet>>]
+  /// CHECK:                                Return [<<Ret>>]
+  public static int inlinePolymophicSubASubB(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
+
+  // Note that the order in which the types are added to the inline cache in the profile matters.
+
+  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (after)
+  /// CHECK-DAG:  <<SubARet:i\d+>>          IntConstant 42
+  /// CHECK-DAG:  <<SubCRet:i\d+>>          IntConstant 24
+  /// CHECK:      <<ObjClassSubA:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
+  /// CHECK:      <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
+  /// CHECK:                                If [<<TestSubA>>]
+
+  /// CHECK:      <<ObjClassSubC:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubC:l\d+>>  LoadClass class_name:SubC
+  /// CHECK:      <<TestSubC:z\d+>>         NotEqual [<<InlineClassSubC>>,<<ObjClassSubC>>]
+  /// CHECK:                                Deoptimize [<<TestSubC>>]
+
+  /// CHECK:      <<Ret:i\d+>>              Phi [<<SubARet>>,<<SubCRet>>]
+  /// CHECK:                                Return [<<Ret>>]
+  public static int inlinePolymophicCrossDexSubASubC(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlineMegamorphic(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMegamorphic(Super) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+  public static int inlineMegamorphic(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlineMissingTypes(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMissingTypes(Super) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+  public static int inlineMissingTypes(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.noInlineCache(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.noInlineCache(Super) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+  public static int noInlineCache(Super a) {
+    return a.getValue();
+  }
+
+  public static void testInlineMonomorphic() {
+    if (inlineMonomorphicSubA(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+
+    // Call with a different type than the one from the inline cache.
+    if (inlineMonomorphicSubA(new SubB()) != 38) {
+      throw new Error("Expected 38");
+    }
+  }
+
+  public static void testInlinePolymorhic() {
+    if (inlinePolymophicSubASubB(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+
+    if (inlinePolymophicSubASubB(new SubB()) != 38) {
+      throw new Error("Expected 38");
+    }
+
+    // Call with a different type than the one from the inline cache.
+    if (inlinePolymophicSubASubB(new SubC()) != 24) {
+      throw new Error("Expected 25");
+    }
+
+    if (inlinePolymophicCrossDexSubASubC(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+
+    if (inlinePolymophicCrossDexSubASubC(new SubC()) != 24) {
+      throw new Error("Expected 24");
+    }
+
+    // Call with a different type than the one from the inline cache.
+    if (inlinePolymophicCrossDexSubASubC(new SubB()) != 38) {
+      throw new Error("Expected 38");
+    }
+  }
+
+  public static void testInlineMegamorphic() {
+    if (inlineMegamorphic(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+  }
+
+
+  public static void testNoInlineCache() {
+    if (noInlineCache(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+  }
+
+  public static void main(String[] args) {
+    testInlineMonomorphic();
+    testInlinePolymorhic();
+    testInlineMegamorphic();
+    testNoInlineCache();
+  }
+
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/638-checker-inline-caches/src/Super.java
similarity index 82%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/638-checker-inline-caches/src/Super.java
index cba73b3..30cdf30 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/638-checker-inline-caches/src/Super.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public abstract class Super {
+  abstract int getValue();
 }
diff --git a/test/639-checker-code-sinking/expected.txt b/test/639-checker-code-sinking/expected.txt
new file mode 100644
index 0000000..52e756c
--- /dev/null
+++ b/test/639-checker-code-sinking/expected.txt
@@ -0,0 +1,3 @@
+0
+class java.lang.Object
+43
diff --git a/test/639-checker-code-sinking/info.txt b/test/639-checker-code-sinking/info.txt
new file mode 100644
index 0000000..9722bdf
--- /dev/null
+++ b/test/639-checker-code-sinking/info.txt
@@ -0,0 +1 @@
+Checker tests for the code sinking optimization pass.
diff --git a/test/639-checker-code-sinking/src/Main.java b/test/639-checker-code-sinking/src/Main.java
new file mode 100644
index 0000000..1da19b6
--- /dev/null
+++ b/test/639-checker-code-sinking/src/Main.java
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    testSimpleUse();
+    testTwoUses();
+    testFieldStores(doThrow);
+    testFieldStoreCycle();
+    testArrayStores();
+    testOnlyStoreUses();
+    testNoUse();
+    testPhiInput();
+    testVolatileStore();
+    doThrow = true;
+    try {
+      testInstanceSideEffects();
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+    try {
+      testStaticSideEffects();
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+
+    try {
+      testStoreStore(doThrow);
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+  }
+
+  /// CHECK-START: void Main.testSimpleUse() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK:                    Throw
+
+  /// CHECK-START: void Main.testSimpleUse() code_sinking (after)
+  /// CHECK-NOT:                NewInstance
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK: <<Error:l\d+>>     LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<Error>>]
+  /// CHECK:                    Throw
+  public static void testSimpleUse() {
+    Object o = new Object();
+    if (doThrow) {
+      throw new Error(o.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testTwoUses() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK:                    Throw
+
+  /// CHECK-START: void Main.testTwoUses() code_sinking (after)
+  /// CHECK-NOT:                NewInstance
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK: <<Error:l\d+>>     LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<Error>>]
+  /// CHECK:                    Throw
+  public static void testTwoUses() {
+    Object o = new Object();
+    if (doThrow) {
+      throw new Error(o.toString() + o.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testFieldStores(boolean) code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testFieldStores(boolean) code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testFieldStores(boolean doThrow) {
+    Main m = new Main();
+    m.intField = 42;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testFieldStoreCycle() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance1:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK: <<NewInstance2:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance1>>,<<NewInstance2>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance2>>,<<NewInstance1>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+
+  // TODO(ngeoffray): Handle allocation/store cycles.
+  /// CHECK-START: void Main.testFieldStoreCycle() code_sinking (after)
+  /// CHECK: begin_block
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance1:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK: <<NewInstance2:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance1>>,<<NewInstance2>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance2>>,<<NewInstance1>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+  public static void testFieldStoreCycle() {
+    Main m1 = new Main();
+    Main m2 = new Main();
+    m1.objectField = m2;
+    m2.objectField = m1;
+    if (doThrow) {
+      throw new Error(m1.toString() + m2.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testArrayStores() code_sinking (before)
+  /// CHECK: <<Int1:i\d+>>        IntConstant 1
+  /// CHECK: <<Int0:i\d+>>        IntConstant 0
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object[]
+  /// CHECK: <<NewArray:l\d+>>    NewArray [<<LoadClass>>,<<Int1>>]
+  /// CHECK:                      ArraySet [<<NewArray>>,<<Int0>>,<<NewArray>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testArrayStores() code_sinking (after)
+  /// CHECK: <<Int1:i\d+>>        IntConstant 1
+  /// CHECK: <<Int0:i\d+>>        IntConstant 0
+  /// CHECK-NOT:                  NewArray
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object[]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewArray:l\d+>>    NewArray [<<LoadClass>>,<<Int1>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      ArraySet [<<NewArray>>,<<Int0>>,<<NewArray>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testArrayStores() {
+    Object[] o = new Object[1];
+    o[0] = o;
+    if (doThrow) {
+      throw new Error(o.toString());
+    }
+  }
+
+  // Make sure code sinking does not crash on dead allocations.
+  public static void testOnlyStoreUses() {
+    Main m = new Main();
+    Object[] o = new Object[1];  // dead allocation, should eventually be removed b/35634932.
+    o[0] = m;
+    o = null;  // Avoid environment uses for the array allocation.
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  // Make sure code sinking does not crash on dead code.
+  public static void testNoUse() {
+    Main m = new Main();
+    boolean load = Main.doLoop;  // dead code, not removed because of environment use.
+    // Ensure one environment use for the static field
+    $opt$noinline$foo();
+    load = false;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  // Make sure we can move code only used by a phi.
+  /// CHECK-START: void Main.testPhiInput() code_sinking (before)
+  /// CHECK: <<Null:l\d+>>        NullConstant
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Phi [<<Null>>,<<NewInstance>>]
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testPhiInput() code_sinking (after)
+  /// CHECK: <<Null:l\d+>>        NullConstant
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      begin_block
+  /// CHECK:                      Phi [<<Null>>,<<NewInstance>>]
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testPhiInput() {
+    Object f = new Object();
+    if (doThrow) {
+      Object o = null;
+      int i = 2;
+      if (doLoop) {
+        o = f;
+        i = 42;
+      }
+      throw new Error(o.toString() + i);
+    }
+  }
+
+  static void $opt$noinline$foo() {}
+
+  // Check that we do not move volatile stores.
+  /// CHECK-START: void Main.testVolatileStore() code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>        IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>>  NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+
+  /// CHECK-START: void Main.testVolatileStore() code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>        IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>>  NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+  public static void testVolatileStore() {
+    Main m = new Main();
+    m.volatileField = 42;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  public static void testInstanceSideEffects() {
+    int a = mainField.intField;
+    $noinline$changeIntField();
+    if (doThrow) {
+      throw new Error("" + a);
+    }
+  }
+
+  static void $noinline$changeIntField() {
+    mainField.intField = 42;
+  }
+
+  public static void testStaticSideEffects() {
+    Object o = obj;
+    $noinline$changeStaticObjectField();
+    if (doThrow) {
+      throw new Error(o.getClass().toString());
+    }
+  }
+
+  static void $noinline$changeStaticObjectField() {
+    obj = new Main();
+  }
+
+  // Test that we preserve the order of stores.
+  /// CHECK-START: void Main.testStoreStore(boolean) code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<Int43:i\d+>>       IntConstant 43
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int43>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testStoreStore(boolean) code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<Int43:i\d+>>       IntConstant 43
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int43>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testStoreStore(boolean doThrow) {
+    Main m = new Main();
+    m.intField = 42;
+    m.intField = 43;
+    if (doThrow) {
+      throw new Error(m.$opt$noinline$toString());
+    }
+  }
+
+  public String $opt$noinline$toString() {
+    return "" + intField;
+  }
+
+  volatile int volatileField;
+  int intField;
+  Object objectField;
+  static boolean doThrow;
+  static boolean doLoop;
+  static Main mainField = new Main();
+  static Object obj = new Object();
+}
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/640-checker-integer-valueof/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/640-checker-integer-valueof/expected.txt
diff --git a/test/640-checker-integer-valueof/info.txt b/test/640-checker-integer-valueof/info.txt
new file mode 100644
index 0000000..51021a4
--- /dev/null
+++ b/test/640-checker-integer-valueof/info.txt
@@ -0,0 +1 @@
+Test for Integer.valueOf.
diff --git a/test/640-checker-integer-valueof/src/Main.java b/test/640-checker-integer-valueof/src/Main.java
new file mode 100644
index 0000000..0837fd1
--- /dev/null
+++ b/test/640-checker-integer-valueof/src/Main.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: java.lang.Integer Main.foo(int) disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK:                      pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo(int a) {
+    return Integer.valueOf(a);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo2() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK-NOT:                  pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo2() {
+    return Integer.valueOf(-42);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo3() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK-NOT:                  pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo3() {
+    return Integer.valueOf(42);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo4() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK:                      pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo4() {
+    return Integer.valueOf(55555);
+  }
+
+  public static void main(String[] args) {
+    assertEqual("42", foo(intField));
+    assertEqual(foo(intField), foo(intField2));
+    assertEqual("-42", foo2());
+    assertEqual("42", foo3());
+    assertEqual("55555", foo4());
+    assertEqual("55555", foo(intField3));
+    assertEqual("-129", foo(intFieldMinus129));
+    assertEqual("-128", foo(intFieldMinus128));
+    assertEqual(foo(intFieldMinus128), foo(intFieldMinus128));
+    assertEqual("-127", foo(intFieldMinus127));
+    assertEqual(foo(intFieldMinus127), foo(intFieldMinus127));
+    assertEqual("126", foo(intField126));
+    assertEqual(foo(intField126), foo(intField126));
+    assertEqual("127", foo(intField127));
+    assertEqual(foo(intField127), foo(intField127));
+    assertEqual("128", foo(intField128));
+  }
+
+  static void assertEqual(String a, Integer b) {
+    if (!a.equals(b.toString())) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  static void assertEqual(Integer a, Integer b) {
+    if (a != b) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  static int intField = 42;
+  static int intField2 = 42;
+  static int intField3 = 55555;
+
+  // Edge cases.
+  static int intFieldMinus129 = -129;
+  static int intFieldMinus128 = -128;
+  static int intFieldMinus127 = -127;
+  static int intField126 = 126;
+  static int intField127 = 127;
+  static int intField128 = 128;
+}
diff --git a/test/577-profile-foreign-dex/run b/test/641-checker-arraycopy/build
similarity index 70%
copy from test/577-profile-foreign-dex/run
copy to test/641-checker-arraycopy/build
index ad57d14..9abc618 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/641-checker-arraycopy/build
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+# make us exit on a failure
+set -e
+
+# Don't use jack for this test, to ensure we don't use
+# the typed System.arraycopy versions directly.
+export USE_JACK=false
+
+./default-build
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/641-checker-arraycopy/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/641-checker-arraycopy/expected.txt
diff --git a/test/641-checker-arraycopy/info.txt b/test/641-checker-arraycopy/info.txt
new file mode 100644
index 0000000..1a1111e
--- /dev/null
+++ b/test/641-checker-arraycopy/info.txt
@@ -0,0 +1,2 @@
+Checker test for testing the arraycopy optimization in
+instruction simplifier.
diff --git a/test/641-checker-arraycopy/src/Main.java b/test/641-checker-arraycopy/src/Main.java
new file mode 100644
index 0000000..f0fcf28
--- /dev/null
+++ b/test/641-checker-arraycopy/src/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Note that this is testing we haven't intrinsified the byte[] arraycopy version.
+  // Once we eventually start doing it, we will need to re-adjust this test.
+
+  /// CHECK-START-X86: void Main.typedCopy(java.lang.Object, byte[]) disassembly (after)
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK-NOT:    call
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK:        call
+  /// CHECK: ReturnVoid
+  public static void typedCopy(Object o, byte[] foo) {
+    System.arraycopy(o, 1, o, 0, 1);
+    System.arraycopy(foo, 1, foo, 0, 1);
+  }
+
+  public static void untypedCopy(Object o, Object foo) {
+    System.arraycopy(o, 1, o, 0, 1);
+    System.arraycopy(foo, 1, foo, 0, 1);
+  }
+
+  // Test that we still do the optimization after inlining.
+
+  /// CHECK-START-X86: void Main.untypedCopyCaller(java.lang.Object, byte[]) disassembly (after)
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK-NOT:    call
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK:        call
+  /// CHECK: ReturnVoid
+  public static void untypedCopyCaller(Object o, byte[] array) {
+    untypedCopy(o, array);
+  }
+
+  public static void assertEquals(Object one, Object two) {
+    if (one != two) {
+      throw new Error("Expected " + one + ", got " + two);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Simple sanity checks.
+    byte[] a = new byte[2];
+    Object[] o = new Object[2];
+
+    o[0] = a;
+    o[1] = o;
+    a[0] = 1;
+    a[1] = 2;
+
+    untypedCopyCaller(o, a);
+    assertEquals(o[0], o);
+    assertEquals(o[1], o);
+    assertEquals(a[0], (byte)2);
+    assertEquals(a[1], (byte)2);
+
+    o[0] = a;
+    o[1] = o;
+    a[0] = 1;
+    a[1] = 2;
+
+    typedCopy(o, a);
+    assertEquals(o[0], o);
+    assertEquals(o[1], o);
+    assertEquals(a[0], (byte)2);
+    assertEquals(a[1], (byte)2);
+  }
+}
diff --git a/test/641-irreducible-inline/expected.txt b/test/641-irreducible-inline/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/641-irreducible-inline/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/641-irreducible-inline/info.txt b/test/641-irreducible-inline/info.txt
new file mode 100644
index 0000000..ec6d0d2
--- /dev/null
+++ b/test/641-irreducible-inline/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+inlining a method that throws in an irreducible loop
diff --git a/test/641-irreducible-inline/smali/IrreducibleLoop.smali b/test/641-irreducible-inline/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..3e6c1f1
--- /dev/null
+++ b/test/641-irreducible-inline/smali/IrreducibleLoop.smali
@@ -0,0 +1,54 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+.method public static simpleLoop(I)I
+   .registers 3
+   const/16 v0, 42
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-nez p0, :exit
+   invoke-static {v0},LIrreducibleLoop;->foo(I)V
+   :other_loop_entry
+   goto :loop_entry
+
+   # The else part.
+   :other_loop_pre_entry
+   if-eqz p0, :other_loop_entry
+   invoke-static {v0},LIrreducibleLoop;->foo(I)V
+   goto :other_loop_entry
+
+   :exit
+   return v0
+.end method
+
+.method public static foo(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Inlining a method that throws requires re-computing loop information
+   # which is unsupported when the caller has an irreducible loop.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/641-irreducible-inline/src/Main.java b/test/641-irreducible-inline/src/Main.java
new file mode 100644
index 0000000..53244f7
--- /dev/null
+++ b/test/641-irreducible-inline/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/641-iterations/expected.txt b/test/641-iterations/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/641-iterations/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/641-iterations/info.txt b/test/641-iterations/info.txt
new file mode 100644
index 0000000..fd80595
--- /dev/null
+++ b/test/641-iterations/info.txt
@@ -0,0 +1 @@
+Tests on varying trip counts (to validate vector/cleanup loops).
diff --git a/test/641-iterations/src/Main.java b/test/641-iterations/src/Main.java
new file mode 100644
index 0000000..6a27f80
--- /dev/null
+++ b/test/641-iterations/src/Main.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests of varying trip counts. Focused on testing
+ * core and cleanup loop after vectorization.
+ */
+public class Main {
+
+  static int[] sA;
+
+  static void init() {
+    for (int i = 0; i < sA.length; i++)
+      sA[i] = 100;
+  }
+
+  static void doitTo(int n) {
+    for (int i = 0; i < n; i++)
+      sA[i] += 1;
+  }
+
+  static void doitFrom(int n) {
+    for (int i = n; i < sA.length; i++)
+      sA[i] += 1;
+  }
+
+  static void verify(int n) {
+    for (int i = 0; i < n; i++)
+      if (sA[i] != 101)
+        throw new Error("failed inside loop");
+    for (int i = n; i < sA.length; i++)
+      if (sA[i] != 100)
+        throw new Error("failed outside loop");
+  }
+
+  static void verify() {
+    for (int i = 0; i < sA.length; i++)
+      if (sA[i] != 101)
+        throw new Error("failed inside loop");
+  }
+
+  static void driver() {
+    for (int n = 0; n <= sA.length; n++) {
+      init();
+      doitTo(n);
+      verify(n);
+      doitFrom(n);
+      verify();
+    }
+  }
+
+  public static void main(String[] args) {
+    sA = new int[17];
+    driver();
+    sA = new int[32];
+    driver();
+    System.out.println("passed");
+  }
+}
+
diff --git a/test/642-fp-callees/expected.txt b/test/642-fp-callees/expected.txt
new file mode 100644
index 0000000..77a1486
--- /dev/null
+++ b/test/642-fp-callees/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Done
diff --git a/test/642-fp-callees/fp_callees.cc b/test/642-fp-callees/fp_callees.cc
new file mode 100644
index 0000000..600f969
--- /dev/null
+++ b/test/642-fp-callees/fp_callees.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "jni.h"
+
+namespace art {
+
+// Make the array volatile, which is apparently making the C compiler
+// use FP registers in the method below.
+volatile double array[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
+
+extern "C" JNIEXPORT void JNICALL Java_Main_holdFpTemporaries(JNIEnv* env, jclass cls) {
+  jmethodID mid = env->GetStaticMethodID(cls, "caller", "(IIJ)V");
+  CHECK(mid != nullptr);
+  // Load values from the arrays, which will be loaded in callee-save FP registers.
+  double a = array[0];
+  double b = array[1];
+  double c = array[2];
+  double d = array[3];
+  double e = array[4];
+  double f = array[5];
+  double g = array[6];
+  double h = array[7];
+  double i = array[8];
+  double j = array[9];
+  double k = array[10];
+  double l = array[11];
+  env->CallStaticVoidMethod(cls, mid, 1, 1, 1L);
+  // Load it in a temporary to please C compiler with bit_cast.
+  double temp = array[0];
+  CHECK_EQ(bit_cast<int64_t>(a), bit_cast<int64_t>(temp));
+  temp = array[1];
+  CHECK_EQ(bit_cast<int64_t>(b), bit_cast<int64_t>(temp));
+  temp = array[2];
+  CHECK_EQ(bit_cast<int64_t>(c), bit_cast<int64_t>(temp));
+  temp = array[3];
+  CHECK_EQ(bit_cast<int64_t>(d), bit_cast<int64_t>(temp));
+  temp = array[4];
+  CHECK_EQ(bit_cast<int64_t>(e), bit_cast<int64_t>(temp));
+  temp = array[5];
+  CHECK_EQ(bit_cast<int64_t>(f), bit_cast<int64_t>(temp));
+  temp = array[6];
+  CHECK_EQ(bit_cast<int64_t>(g), bit_cast<int64_t>(temp));
+  temp = array[7];
+  CHECK_EQ(bit_cast<int64_t>(h), bit_cast<int64_t>(temp));
+  temp = array[8];
+  CHECK_EQ(bit_cast<int64_t>(i), bit_cast<int64_t>(temp));
+  temp = array[9];
+  CHECK_EQ(bit_cast<int64_t>(j), bit_cast<int64_t>(temp));
+  temp = array[10];
+  CHECK_EQ(bit_cast<int64_t>(k), bit_cast<int64_t>(temp));
+  temp = array[11];
+  CHECK_EQ(bit_cast<int64_t>(l), bit_cast<int64_t>(temp));
+}
+
+}  // namespace art
diff --git a/test/642-fp-callees/info.txt b/test/642-fp-callees/info.txt
new file mode 100644
index 0000000..d3e4bda
--- /dev/null
+++ b/test/642-fp-callees/info.txt
@@ -0,0 +1,2 @@
+Regression test for vixl32 backend, which used to incorrectly
+use D14 as a temporary register.
diff --git a/test/642-fp-callees/src/Main.java b/test/642-fp-callees/src/Main.java
new file mode 100644
index 0000000..fa57c93
--- /dev/null
+++ b/test/642-fp-callees/src/Main.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    holdFpTemporaries();
+    System.out.println("Done");
+  }
+
+  public static void caller(int a, int b, long c) {
+    $noinline$callee(a, b, c);
+  }
+
+  // This method is "no inline", in order to generate the
+  // bad floating point use at the call site.
+  public static void $noinline$callee(int a, int b, long c) {
+  }
+
+  public native static void holdFpTemporaries();
+}
diff --git a/test/701-easy-div-rem/build b/test/701-easy-div-rem/build
index 666fe89..d83ee82 100644
--- a/test/701-easy-div-rem/build
+++ b/test/701-easy-div-rem/build
@@ -21,12 +21,4 @@
 mkdir src
 python ./genMain.py
 
-# Increase the file size limitation for classes.lst as the machine generated
-# source file contains a lot of methods and is quite large.
-
-# Jack generates big temp files so only apply ulimit for dx.
-if [ ${USE_JACK} = "false" ]; then
-  ulimit -S 4096
-fi
-
 ./default-build
diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc
index 0b17656..9166277 100644
--- a/test/901-hello-ti-agent/basics.cc
+++ b/test/901-hello-ti-agent/basics.cc
@@ -20,7 +20,7 @@
 #include <stdio.h>
 #include <string.h>
 #include "base/macros.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
diff --git a/test/903-hello-tagging/expected.txt b/test/903-hello-tagging/expected.txt
index 872b79b..acfdbd8 100644
--- a/test/903-hello-tagging/expected.txt
+++ b/test/903-hello-tagging/expected.txt
@@ -8,3 +8,4 @@
 [<null;1>, <null;1>, <null;2>, <null;2>, <null;3>, <null;3>, <null;4>, <null;4>, <null;5>, <null;5>, <null;6>, <null;6>, <null;7>, <null;7>, <null;8>, <null;8>, <null;9>, <null;9>]
 18
 [<1;0>, <2;0>, <3;0>, <4;0>, <5;0>, <6;0>, <7;0>, <8;0>, <9;0>, <11;0>, <12;0>, <13;0>, <14;0>, <15;0>, <16;0>, <17;0>, <18;0>, <19;0>]
+[100, 101, 102, 103, 104, 105, 106, 107, 108, 109]
diff --git a/test/903-hello-tagging/src/Main.java b/test/903-hello-tagging/src/Main.java
index 2f0365a..48896b2 100644
--- a/test/903-hello-tagging/src/Main.java
+++ b/test/903-hello-tagging/src/Main.java
@@ -22,6 +22,7 @@
   public static void main(String[] args) {
     doTest();
     testGetTaggedObjects();
+    testTags();
   }
 
   public static void doTest() {
@@ -35,6 +36,12 @@
     }
   }
 
+  public static void testTags() {
+    Object o = new Object();
+    long[] res = testTagsInDifferentEnvs(o, 100, 10);
+    System.out.println(Arrays.toString(res));
+  }
+
   private static WeakReference<Object> test() {
     Object o1 = new Object();
     setTag(o1, 1);
@@ -166,4 +173,5 @@
   private static native long getTag(Object o);
   private static native Object[] getTaggedObjects(long[] searchTags, boolean returnObjects,
       boolean returnTags);
+  private static native long[] testTagsInDifferentEnvs(Object o, long baseTag, int n);
 }
diff --git a/test/903-hello-tagging/tagging.cc b/test/903-hello-tagging/tagging.cc
index f74c1fc..b85ed48 100644
--- a/test/903-hello-tagging/tagging.cc
+++ b/test/903-hello-tagging/tagging.cc
@@ -25,7 +25,7 @@
 
 #include "art_method-inl.h"
 #include "base/logging.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 #include "utils.h"
@@ -139,6 +139,62 @@
   return resultArray;
 }
 
+static jvmtiEnv* CreateJvmtiEnv(JNIEnv* env) {
+  JavaVM* jvm;
+  CHECK_EQ(0, env->GetJavaVM(&jvm));
+
+  jvmtiEnv* new_jvmti_env;
+  CHECK_EQ(0, jvm->GetEnv(reinterpret_cast<void**>(&new_jvmti_env), JVMTI_VERSION_1_0));
+
+  jvmtiCapabilities capa;
+  memset(&capa, 0, sizeof(jvmtiCapabilities));
+  capa.can_tag_objects = 1;
+  jvmtiError error = new_jvmti_env->AddCapabilities(&capa);
+  CHECK_EQ(JVMTI_ERROR_NONE, error);
+
+  return new_jvmti_env;
+}
+
+static void SetTag(jvmtiEnv* env, jobject obj, jlong tag) {
+  jvmtiError ret = env->SetTag(obj, tag);
+  CHECK_EQ(JVMTI_ERROR_NONE, ret);
+}
+
+static jlong GetTag(jvmtiEnv* env, jobject obj) {
+  jlong tag;
+  jvmtiError ret = env->GetTag(obj, &tag);
+  CHECK_EQ(JVMTI_ERROR_NONE, ret);
+  return tag;
+}
+
+extern "C" JNIEXPORT jlongArray JNICALL Java_Main_testTagsInDifferentEnvs(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject obj, jlong base_tag, jint count) {
+  std::unique_ptr<jvmtiEnv*[]> envs = std::unique_ptr<jvmtiEnv*[]>(new jvmtiEnv*[count]);
+  envs[0] = jvmti_env;
+  for (int32_t i = 1; i != count; ++i) {
+    envs[i] = CreateJvmtiEnv(env);
+  }
+
+  for (int32_t i = 0; i != count; ++i) {
+    SetTag(envs[i], obj, base_tag + i);
+  }
+  std::unique_ptr<jlong[]> vals = std::unique_ptr<jlong[]>(new jlong[count]);
+  for (int32_t i = 0; i != count; ++i) {
+    vals[i] = GetTag(envs[i], obj);
+  }
+
+  for (int32_t i = 1; i != count; ++i) {
+    CHECK_EQ(JVMTI_ERROR_NONE, envs[i]->DisposeEnvironment());
+  }
+
+  jlongArray res = env->NewLongArray(count);
+  if (res == nullptr) {
+    return nullptr;
+  }
+  env->SetLongArrayRegion(res, 0, count, vals.get());
+  return res;
+}
+
 }  // namespace Test903HelloTagging
 }  // namespace art
 
diff --git a/test/904-object-allocation/tracking.cc b/test/904-object-allocation/tracking.cc
index 95eab0c..cc6f681 100644
--- a/test/904-object-allocation/tracking.cc
+++ b/test/904-object-allocation/tracking.cc
@@ -21,7 +21,7 @@
 
 #include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "ti-agent/common_helper.h"
diff --git a/test/905-object-free/tracking_free.cc b/test/905-object-free/tracking_free.cc
index 7b26d79..5eed472 100644
--- a/test/905-object-free/tracking_free.cc
+++ b/test/905-object-free/tracking_free.cc
@@ -21,7 +21,7 @@
 
 #include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "ti-agent/common_helper.h"
diff --git a/test/906-iterate-heap/expected.txt b/test/906-iterate-heap/expected.txt
index 72cd47d..b6af843 100644
--- a/test/906-iterate-heap/expected.txt
+++ b/test/906-iterate-heap/expected.txt
@@ -1,2 +1,44 @@
-[{tag=1, class-tag=0, size=8, length=-1}, {tag=2, class-tag=100, size=8, length=-1}, {tag=3, class-tag=100, size=8, length=-1}, {tag=4, class-tag=0, size=32, length=5}, {tag=100, class-tag=0, size=<class>, length=-1}]
-[{tag=11, class-tag=0, size=8, length=-1}, {tag=12, class-tag=110, size=8, length=-1}, {tag=13, class-tag=110, size=8, length=-1}, {tag=14, class-tag=0, size=32, length=5}, {tag=110, class-tag=0, size=<class>, length=-1}]
+[{tag=1, class-tag=0, size=8, length=-1}, {tag=2, class-tag=100, size=8, length=-1}, {tag=3, class-tag=100, size=8, length=-1}, {tag=4, class-tag=0, size=32, length=5}, {tag=5, class-tag=0, size=32, length=-1}, {tag=100, class-tag=0, size=<class>, length=-1}]
+[{tag=11, class-tag=0, size=8, length=-1}, {tag=12, class-tag=110, size=8, length=-1}, {tag=13, class-tag=110, size=8, length=-1}, {tag=14, class-tag=0, size=32, length=5}, {tag=15, class-tag=0, size=32, length=-1}, {tag=110, class-tag=0, size=<class>, length=-1}]
+15@0 (32, 'Hello World')
+16
+1@0 (14, 2xZ '0001')
+2
+1@0 (15, 3xB '010203')
+2
+1@0 (16, 2xC '41005a00')
+2
+1@0 (18, 3xS '010002000300')
+2
+1@0 (24, 3xI '010000000200000003000000')
+2
+1@0 (20, 2xF '000000000000803f')
+2
+1@0 (40, 3xJ '010000000000000002000000000000000300000000000000')
+2
+1@0 (32, 2xD '0000000000000000000000000000f03f')
+2
+10000@0 (static, int, index=3) 0000000000000000
+10001
+10000@0 (static, int, index=11) 0000000000000000
+10001
+10000@0 (static, int, index=0) 0000000000000000
+10001
+10000@0 (static, int, index=1) 0000000000000000
+10001
+10000@0 (instance, int, index=2) 0000000000000000
+10001@0 (instance, byte, index=4) 0000000000000001
+10002@0 (instance, char, index=5) 0000000000000061
+10003@0 (instance, int, index=6) 0000000000000003
+10004@0 (instance, long, index=7) 0000000000000004
+10005@0 (instance, short, index=9) 0000000000000002
+10006
+10000@0 (instance, int, index=3) 0000000000000000
+10001@0 (instance, byte, index=5) 0000000000000001
+10002@0 (instance, char, index=6) 0000000000000061
+10003@0 (instance, int, index=7) 0000000000000003
+10004@0 (instance, long, index=8) 0000000000000004
+10005@0 (instance, short, index=10) 0000000000000002
+10006@0 (instance, double, index=12) 3ff3ae147ae147ae
+10007@0 (instance, float, index=13) 000000003f9d70a4
+10008
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
index 1362d47..f2532de 100644
--- a/test/906-iterate-heap/iterate_heap.cc
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -14,17 +14,23 @@
  * limitations under the License.
  */
 
+#include "inttypes.h"
+
+#include <iomanip>
 #include <iostream>
 #include <pthread.h>
+#include <sstream>
 #include <stdio.h>
 #include <vector>
 
+#include "android-base/stringprintf.h"
 #include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedPrimitiveArray.h"
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
+#include "utf.h"
 
 namespace art {
 namespace Test906IterateHeap {
@@ -172,5 +178,236 @@
   Run(heap_filter, klass_filter, &config);
 }
 
+extern "C" JNIEXPORT jstring JNICALL Java_Main_iterateThroughHeapString(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) {
+  struct FindStringCallbacks {
+    explicit FindStringCallbacks(jlong t) : tag_to_find(t) {}
+
+    static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                              jlong size ATTRIBUTE_UNUSED,
+                                              jlong* tag_ptr ATTRIBUTE_UNUSED,
+                                              jint length ATTRIBUTE_UNUSED,
+                                              void* user_data ATTRIBUTE_UNUSED) {
+      return 0;
+    }
+
+    static jint JNICALL StringValueCallback(jlong class_tag,
+                                            jlong size,
+                                            jlong* tag_ptr,
+                                            const jchar* value,
+                                            jint value_length,
+                                            void* user_data) {
+      FindStringCallbacks* p = reinterpret_cast<FindStringCallbacks*>(user_data);
+      if (*tag_ptr == p->tag_to_find) {
+        size_t utf_byte_count = CountUtf8Bytes(value, value_length);
+        std::unique_ptr<char[]> mod_utf(new char[utf_byte_count + 1]);
+        memset(mod_utf.get(), 0, utf_byte_count + 1);
+        ConvertUtf16ToModifiedUtf8(mod_utf.get(), utf_byte_count, value, value_length);
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += android::base::StringPrintf("%" PRId64 "@%" PRId64 " (%" PRId64 ", '%s')",
+                                               *tag_ptr,
+                                               class_tag,
+                                               size,
+                                               mod_utf.get());
+        // Update the tag to test whether that works.
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+    const jlong tag_to_find;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_iteration_callback = FindStringCallbacks::HeapIterationCallback;
+  callbacks.string_primitive_value_callback = FindStringCallbacks::StringValueCallback;
+
+  FindStringCallbacks fsc(tag);
+  jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &fsc);
+  if (JvmtiErrorToException(env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(fsc.data.c_str());
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_iterateThroughHeapPrimitiveArray(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) {
+  struct FindArrayCallbacks {
+    explicit FindArrayCallbacks(jlong t) : tag_to_find(t) {}
+
+    static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                              jlong size ATTRIBUTE_UNUSED,
+                                              jlong* tag_ptr ATTRIBUTE_UNUSED,
+                                              jint length ATTRIBUTE_UNUSED,
+                                              void* user_data ATTRIBUTE_UNUSED) {
+      return 0;
+    }
+
+    static jint JNICALL ArrayValueCallback(jlong class_tag,
+                                           jlong size,
+                                           jlong* tag_ptr,
+                                           jint element_count,
+                                           jvmtiPrimitiveType element_type,
+                                           const void* elements,
+                                           void* user_data) {
+      FindArrayCallbacks* p = reinterpret_cast<FindArrayCallbacks*>(user_data);
+      if (*tag_ptr == p->tag_to_find) {
+        std::ostringstream oss;
+        oss << *tag_ptr
+            << '@'
+            << class_tag
+            << " ("
+            << size
+            << ", "
+            << element_count
+            << "x"
+            << static_cast<char>(element_type)
+            << " '";
+        size_t element_size;
+        switch (element_type) {
+          case JVMTI_PRIMITIVE_TYPE_BOOLEAN:
+          case JVMTI_PRIMITIVE_TYPE_BYTE:
+            element_size = 1;
+            break;
+          case JVMTI_PRIMITIVE_TYPE_CHAR:
+          case JVMTI_PRIMITIVE_TYPE_SHORT:
+            element_size = 2;
+            break;
+          case JVMTI_PRIMITIVE_TYPE_INT:
+          case JVMTI_PRIMITIVE_TYPE_FLOAT:
+            element_size = 4;
+            break;
+          case JVMTI_PRIMITIVE_TYPE_LONG:
+          case JVMTI_PRIMITIVE_TYPE_DOUBLE:
+            element_size = 8;
+            break;
+          default:
+            LOG(FATAL) << "Unknown type " << static_cast<size_t>(element_type);
+            UNREACHABLE();
+        }
+        const uint8_t* data = reinterpret_cast<const uint8_t*>(elements);
+        for (size_t i = 0; i != element_size * element_count; ++i) {
+          oss << android::base::StringPrintf("%02x", data[i]);
+        }
+        oss << "')";
+
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += oss.str();
+        // Update the tag to test whether that works.
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+    const jlong tag_to_find;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_iteration_callback = FindArrayCallbacks::HeapIterationCallback;
+  callbacks.array_primitive_value_callback = FindArrayCallbacks::ArrayValueCallback;
+
+  FindArrayCallbacks fac(tag);
+  jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &fac);
+  if (JvmtiErrorToException(env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(fac.data.c_str());
+}
+
+static constexpr const char* GetPrimitiveTypeName(jvmtiPrimitiveType type) {
+  switch (type) {
+    case JVMTI_PRIMITIVE_TYPE_BOOLEAN:
+      return "boolean";
+    case JVMTI_PRIMITIVE_TYPE_BYTE:
+      return "byte";
+    case JVMTI_PRIMITIVE_TYPE_CHAR:
+      return "char";
+    case JVMTI_PRIMITIVE_TYPE_SHORT:
+      return "short";
+    case JVMTI_PRIMITIVE_TYPE_INT:
+      return "int";
+    case JVMTI_PRIMITIVE_TYPE_FLOAT:
+      return "float";
+    case JVMTI_PRIMITIVE_TYPE_LONG:
+      return "long";
+    case JVMTI_PRIMITIVE_TYPE_DOUBLE:
+      return "double";
+  }
+  LOG(FATAL) << "Unknown type " << static_cast<size_t>(type);
+  UNREACHABLE();
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_iterateThroughHeapPrimitiveFields(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) {
+  struct FindFieldCallbacks {
+    explicit FindFieldCallbacks(jlong t) : tag_to_find(t) {}
+
+    static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                              jlong size ATTRIBUTE_UNUSED,
+                                              jlong* tag_ptr ATTRIBUTE_UNUSED,
+                                              jint length ATTRIBUTE_UNUSED,
+                                              void* user_data ATTRIBUTE_UNUSED) {
+      return 0;
+    }
+
+    static jint JNICALL PrimitiveFieldValueCallback(jvmtiHeapReferenceKind kind,
+                                                    const jvmtiHeapReferenceInfo* info,
+                                                    jlong class_tag,
+                                                    jlong* tag_ptr,
+                                                    jvalue value,
+                                                    jvmtiPrimitiveType value_type,
+                                                    void* user_data) {
+      FindFieldCallbacks* p = reinterpret_cast<FindFieldCallbacks*>(user_data);
+      if (*tag_ptr >= p->tag_to_find) {
+        std::ostringstream oss;
+        oss << *tag_ptr
+            << '@'
+            << class_tag
+            << " ("
+            << (kind == JVMTI_HEAP_REFERENCE_FIELD ? "instance, " : "static, ")
+            << GetPrimitiveTypeName(value_type)
+            << ", index="
+            << info->field.index
+            << ") ";
+        // Be lazy, always print eight bytes.
+        static_assert(sizeof(jvalue) == sizeof(uint64_t), "Unexpected jvalue size");
+        uint64_t val;
+        memcpy(&val, &value, sizeof(uint64_t));  // To avoid undefined behavior.
+        oss << android::base::StringPrintf("%016" PRIx64, val);
+
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += oss.str();
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+    const jlong tag_to_find;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_iteration_callback = FindFieldCallbacks::HeapIterationCallback;
+  callbacks.primitive_field_callback = FindFieldCallbacks::PrimitiveFieldValueCallback;
+
+  FindFieldCallbacks ffc(tag);
+  jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc);
+  if (JvmtiErrorToException(env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(ffc.data.c_str());
+}
+
 }  // namespace Test906IterateHeap
 }  // namespace art
diff --git a/test/906-iterate-heap/src/Main.java b/test/906-iterate-heap/src/Main.java
index cab27be..365ce0f 100644
--- a/test/906-iterate-heap/src/Main.java
+++ b/test/906-iterate-heap/src/Main.java
@@ -28,11 +28,13 @@
     B b2 = new B();
     C c = new C();
     A[] aArray = new A[5];
+    String s = "Hello World";
 
     setTag(a, 1);
     setTag(b, 2);
     setTag(b2, 3);
     setTag(aArray, 4);
+    setTag(s, 5);
     setTag(B.class, 100);
 
     int all = iterateThroughHeapCount(0, null, Integer.MAX_VALUE);
@@ -50,7 +52,7 @@
       throw new IllegalStateException("By class: " + all + " != " + taggedClass + " + " +
           untaggedClass);
     }
-    if (tagged != 5) {
+    if (tagged != 6) {
       throw new IllegalStateException(tagged + " tagged objects");
     }
     if (taggedClass != 2) {
@@ -74,6 +76,103 @@
     iterateThroughHeapAdd(HEAP_FILTER_OUT_UNTAGGED, null);
     n = iterateThroughHeapData(HEAP_FILTER_OUT_UNTAGGED, null, classTags, sizes, tags, lengths);
     System.out.println(sort(n, classTags, sizes, tags, lengths));
+
+    System.out.println(iterateThroughHeapString(getTag(s)));
+    System.out.println(getTag(s));
+
+    boolean[] zArray = new boolean[] { false, true };
+    setTag(zArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(zArray)));
+    System.out.println(getTag(zArray));
+
+    byte[] bArray = new byte[] { 1, 2, 3 };
+    setTag(bArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(bArray)));
+    System.out.println(getTag(bArray));
+
+    char[] cArray = new char[] { 'A', 'Z' };
+    setTag(cArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(cArray)));
+    System.out.println(getTag(cArray));
+
+    short[] sArray = new short[] { 1, 2, 3 };
+    setTag(sArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(sArray)));
+    System.out.println(getTag(sArray));
+
+    int[] iArray = new int[] { 1, 2, 3 };
+    setTag(iArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(iArray)));
+    System.out.println(getTag(iArray));
+
+    float[] fArray = new float[] { 0.0f, 1.0f };
+    setTag(fArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(fArray)));
+    System.out.println(getTag(fArray));
+
+    long[] lArray = new long[] { 1, 2, 3 };
+    setTag(lArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(lArray)));
+    System.out.println(getTag(lArray));
+
+    double[] dArray = new double[] { 0.0, 1.0 };
+    setTag(dArray, 1);
+    System.out.println(iterateThroughHeapPrimitiveArray(getTag(dArray)));
+    System.out.println(getTag(dArray));
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsClasses();
+
+    doTestPrimitiveFieldsIntegral();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsFloat();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+  }
+
+  private static void doTestPrimitiveFieldsClasses() {
+    setTag(IntObject.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(IntObject.class));
+    setTag(IntObject.class, 0);
+
+    setTag(FloatObject.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(FloatObject.class));
+    setTag(FloatObject.class, 0);
+
+    setTag(Inf1.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(Inf1.class));
+    setTag(Inf1.class, 0);
+
+    setTag(Inf2.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(Inf2.class));
+    setTag(Inf2.class, 0);
+  }
+
+  private static void doTestPrimitiveFieldsIntegral() {
+    IntObject intObject = new IntObject();
+    setTag(intObject, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(intObject));
+  }
+
+  private static void doTestPrimitiveFieldsFloat() {
+    FloatObject floatObject = new FloatObject();
+    setTag(floatObject, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(floatObject));
   }
 
   static class A {
@@ -127,6 +226,31 @@
     return ret;
   }
 
+  private static interface Inf1 {
+    public final static int A = 1;
+  }
+
+  private static interface Inf2 extends Inf1 {
+    public final static int B = 1;
+  }
+
+  private static class IntObject implements Inf1 {
+    byte b = (byte)1;
+    char c= 'a';
+    short s = (short)2;
+    int i = 3;
+    long l = 4;
+    Object o = new Object();
+    static int sI = 5;
+  }
+
+  private static class FloatObject extends IntObject implements Inf2 {
+    float f = 1.23f;
+    double d = 1.23;
+    Object p = new Object();
+    static int sI = 6;
+  }
+
   private static native void setTag(Object o, long tag);
   private static native long getTag(Object o);
 
@@ -141,4 +265,7 @@
       Class<?> klassFilter, long classTags[], long sizes[], long tags[], int lengths[]);
   private static native int iterateThroughHeapAdd(int heapFilter,
       Class<?> klassFilter);
+  private static native String iterateThroughHeapString(long tag);
+  private static native String iterateThroughHeapPrimitiveArray(long tag);
+  private static native String iterateThroughHeapPrimitiveFields(long tag);
 }
diff --git a/test/907-get-loaded-classes/get_loaded_classes.cc b/test/907-get-loaded-classes/get_loaded_classes.cc
index 5bda7eb..48ce2e2 100644
--- a/test/907-get-loaded-classes/get_loaded_classes.cc
+++ b/test/907-get-loaded-classes/get_loaded_classes.cc
@@ -21,7 +21,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 
diff --git a/test/908-gc-start-finish/gc_callbacks.cc b/test/908-gc-start-finish/gc_callbacks.cc
index 8f96ee6..45148f8 100644
--- a/test/908-gc-start-finish/gc_callbacks.cc
+++ b/test/908-gc-start-finish/gc_callbacks.cc
@@ -19,7 +19,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
diff --git a/test/909-attach-agent/attach.cc b/test/909-attach-agent/attach.cc
index adae844..67c7567 100644
--- a/test/909-attach-agent/attach.cc
+++ b/test/909-attach-agent/attach.cc
@@ -20,7 +20,7 @@
 #include <stdio.h>
 #include <string.h>
 #include "base/macros.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 namespace art {
 namespace Test909AttachAgent {
diff --git a/test/910-methods/methods.cc b/test/910-methods/methods.cc
index f60fabb..fdc4cdb 100644
--- a/test/910-methods/methods.cc
+++ b/test/910-methods/methods.cc
@@ -18,7 +18,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/911-get-stack-trace/stack_trace.cc b/test/911-get-stack-trace/stack_trace.cc
index 68f6d8d..5a3a311 100644
--- a/test/911-get-stack-trace/stack_trace.cc
+++ b/test/911-get-stack-trace/stack_trace.cc
@@ -24,7 +24,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
diff --git a/test/912-classes/classes.cc b/test/912-classes/classes.cc
index 3ccfe86..5bd34f6 100644
--- a/test/912-classes/classes.cc
+++ b/test/912-classes/classes.cc
@@ -20,7 +20,7 @@
 #include "class_linker.h"
 #include "jni.h"
 #include "mirror/class_loader.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
diff --git a/test/912-classes/expected.txt b/test/912-classes/expected.txt
index e932b20..6b86ac9 100644
--- a/test/912-classes/expected.txt
+++ b/test/912-classes/expected.txt
@@ -15,7 +15,8 @@
 int interface=false array=false modifiable=false
 $Proxy0 interface=false array=false modifiable=false
 java.lang.Runnable interface=true array=false modifiable=false
-java.lang.String interface=false array=false modifiable=true
+java.lang.String interface=false array=false modifiable=false
+java.util.ArrayList interface=false array=false modifiable=true
 [I interface=false array=true modifiable=false
 [Ljava.lang.Runnable; interface=false array=true modifiable=false
 [Ljava.lang.String; interface=false array=true modifiable=false
diff --git a/test/912-classes/src/Main.java b/test/912-classes/src/Main.java
index 005074f..5d25d76 100644
--- a/test/912-classes/src/Main.java
+++ b/test/912-classes/src/Main.java
@@ -17,6 +17,7 @@
 import java.lang.ref.Reference;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Proxy;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 
@@ -40,6 +41,7 @@
     testClassType(getProxyClass());
     testClassType(Runnable.class);
     testClassType(String.class);
+    testClassType(ArrayList.class);
 
     testClassType(int[].class);
     testClassType(Runnable[].class);
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index 7522a65..fc2761e 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -1,6 +1,6 @@
 ---
 true true
-root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=11,location= 31])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
 root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
@@ -8,74 +8,310 @@
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 root@root --(jni-global)--> 1@1000 [size=16, length=-1]
 root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
-root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=10,location= 6])--> 1@1000 [size=16, length=-1]
-root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 6])--> 1@1000 [size=16, length=-1]
-root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=3,location= 18])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=13,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
+[1@0 (32, 'HelloWorld'), 2@0 (16, '')]
+2
+3
+2@0 (15, 3xB '010203')
+3@0 (16, 2xC '41005a00')
+8@0 (32, 2xD '0000000000000000000000000000f03f')
+6@0 (20, 2xF '000000000000803f')
+5@0 (24, 3xI '010000000200000003000000')
+7@0 (40, 3xJ '010000000000000002000000000000000300000000000000')
+4@0 (18, 3xS '010002000300')
+1@0 (14, 2xZ '0001')
+23456789
+10000@0 (static, int, index=3) 0000000000000000
+10001
+10000@0 (static, int, index=11) 0000000000000000
+10001
+10000@0 (static, int, index=0) 0000000000000000
+10001
+10000@0 (static, int, index=1) 0000000000000000
+10001
+10000@0 (instance, int, index=2) 0000000000000000
+10001@0 (instance, byte, index=4) 0000000000000001
+10002@0 (instance, char, index=5) 0000000000000061
+10003@0 (instance, int, index=6) 0000000000000003
+10004@0 (instance, long, index=7) 0000000000000004
+10005@0 (instance, short, index=9) 0000000000000002
+10006
+10000@0 (instance, int, index=3) 0000000000000000
+10001@0 (instance, byte, index=5) 0000000000000001
+10002@0 (instance, char, index=6) 0000000000000061
+10003@0 (instance, int, index=7) 0000000000000003
+10004@0 (instance, long, index=8) 0000000000000004
+10005@0 (instance, short, index=10) 0000000000000002
+10006@0 (instance, double, index=12) 3ff3ae147ae147ae
+10007@0 (instance, float, index=13) 000000003f9d70a4
+10008
+--- klass ---
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=13,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+--- heap_filter ---
+---- tagged objects
+---
+---
+---
+---
+---- untagged objects
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=13,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+---- tagged classes
+root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+---- untagged classes
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=13,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
+---
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 6759919..66fc7be 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#include <iostream>
 #include <vector>
 
 #include "android-base/stringprintf.h"
@@ -27,8 +28,9 @@
 #include "jit/jit.h"
 #include "jni.h"
 #include "native_stack_dump.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
@@ -279,8 +281,14 @@
                         jlong size,
                         jint length,
                         const jvmtiHeapReferenceInfo* reference_info)
+          REQUIRES_SHARED(Locks::mutator_lock_)
           : Elem(referrer, referree, size, length) {
         memcpy(&info_, reference_info, sizeof(jvmtiHeapReferenceInfo));
+        // Debug stack trace for failure condition. Remove when done.
+        if (info_.stack_local.depth == 3 && info_.stack_local.slot == 13) {
+          DumpNativeStack(std::cerr, GetTid());
+          Thread::Current()->DumpJavaStack(std::cerr, false, false);
+        }
       }
 
      protected:
@@ -493,5 +501,248 @@
   return ret;
 }
 
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_followReferencesString(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject initial_object) {
+  struct FindStringCallbacks {
+    static jint JNICALL FollowReferencesCallback(
+        jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED,
+        const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED,
+        jlong class_tag ATTRIBUTE_UNUSED,
+        jlong referrer_class_tag ATTRIBUTE_UNUSED,
+        jlong size ATTRIBUTE_UNUSED,
+        jlong* tag_ptr ATTRIBUTE_UNUSED,
+        jlong* referrer_tag_ptr ATTRIBUTE_UNUSED,
+        jint length ATTRIBUTE_UNUSED,
+        void* user_data ATTRIBUTE_UNUSED) {
+      return JVMTI_VISIT_OBJECTS;  // Continue visiting.
+    }
+
+    static jint JNICALL StringValueCallback(jlong class_tag,
+                                            jlong size,
+                                            jlong* tag_ptr,
+                                            const jchar* value,
+                                            jint value_length,
+                                            void* user_data) {
+      FindStringCallbacks* p = reinterpret_cast<FindStringCallbacks*>(user_data);
+      if (*tag_ptr != 0) {
+        size_t utf_byte_count = CountUtf8Bytes(value, value_length);
+        std::unique_ptr<char[]> mod_utf(new char[utf_byte_count + 1]);
+        memset(mod_utf.get(), 0, utf_byte_count + 1);
+        ConvertUtf16ToModifiedUtf8(mod_utf.get(), utf_byte_count, value, value_length);
+        p->data.push_back(android::base::StringPrintf("%" PRId64 "@%" PRId64 " (%" PRId64 ", '%s')",
+                                                      *tag_ptr,
+                                                      class_tag,
+                                                      size,
+                                                      mod_utf.get()));
+        // Update the tag to test whether that works.
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::vector<std::string> data;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_reference_callback = FindStringCallbacks::FollowReferencesCallback;
+  callbacks.string_primitive_value_callback = FindStringCallbacks::StringValueCallback;
+
+  FindStringCallbacks fsc;
+  jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, initial_object, &callbacks, &fsc);
+  if (JvmtiErrorToException(env, ret)) {
+    return nullptr;
+  }
+
+  jobjectArray retArray = CreateObjectArray(env,
+                                            static_cast<jint>(fsc.data.size()),
+                                            "java/lang/String",
+                                            [&](jint i) {
+                                              return env->NewStringUTF(fsc.data[i].c_str());
+                                            });
+  return retArray;
+}
+
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_followReferencesPrimitiveArray(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject initial_object) {
+  struct FindArrayCallbacks {
+    static jint JNICALL FollowReferencesCallback(
+        jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED,
+        const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED,
+        jlong class_tag ATTRIBUTE_UNUSED,
+        jlong referrer_class_tag ATTRIBUTE_UNUSED,
+        jlong size ATTRIBUTE_UNUSED,
+        jlong* tag_ptr ATTRIBUTE_UNUSED,
+        jlong* referrer_tag_ptr ATTRIBUTE_UNUSED,
+        jint length ATTRIBUTE_UNUSED,
+        void* user_data ATTRIBUTE_UNUSED) {
+      return JVMTI_VISIT_OBJECTS;  // Continue visiting.
+    }
+
+    static jint JNICALL ArrayValueCallback(jlong class_tag,
+                                           jlong size,
+                                           jlong* tag_ptr,
+                                           jint element_count,
+                                           jvmtiPrimitiveType element_type,
+                                           const void* elements,
+                                           void* user_data) {
+      FindArrayCallbacks* p = reinterpret_cast<FindArrayCallbacks*>(user_data);
+      if (*tag_ptr != 0) {
+        std::ostringstream oss;
+        oss << *tag_ptr
+            << '@'
+            << class_tag
+            << " ("
+            << size
+            << ", "
+            << element_count
+            << "x"
+            << static_cast<char>(element_type)
+            << " '";
+        size_t element_size;
+        switch (element_type) {
+          case JVMTI_PRIMITIVE_TYPE_BOOLEAN:
+          case JVMTI_PRIMITIVE_TYPE_BYTE:
+            element_size = 1;
+            break;
+          case JVMTI_PRIMITIVE_TYPE_CHAR:
+          case JVMTI_PRIMITIVE_TYPE_SHORT:
+            element_size = 2;
+            break;
+          case JVMTI_PRIMITIVE_TYPE_INT:
+          case JVMTI_PRIMITIVE_TYPE_FLOAT:
+            element_size = 4;
+            break;
+          case JVMTI_PRIMITIVE_TYPE_LONG:
+          case JVMTI_PRIMITIVE_TYPE_DOUBLE:
+            element_size = 8;
+            break;
+          default:
+            LOG(FATAL) << "Unknown type " << static_cast<size_t>(element_type);
+            UNREACHABLE();
+        }
+        const uint8_t* data = reinterpret_cast<const uint8_t*>(elements);
+        for (size_t i = 0; i != element_size * element_count; ++i) {
+          oss << android::base::StringPrintf("%02x", data[i]);
+        }
+        oss << "')";
+
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += oss.str();
+        // Update the tag to test whether that works.
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_reference_callback = FindArrayCallbacks::FollowReferencesCallback;
+  callbacks.array_primitive_value_callback = FindArrayCallbacks::ArrayValueCallback;
+
+  FindArrayCallbacks fac;
+  jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, initial_object, &callbacks, &fac);
+  if (JvmtiErrorToException(env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(fac.data.c_str());
+}
+
+static constexpr const char* GetPrimitiveTypeName(jvmtiPrimitiveType type) {
+  switch (type) {
+    case JVMTI_PRIMITIVE_TYPE_BOOLEAN:
+      return "boolean";
+    case JVMTI_PRIMITIVE_TYPE_BYTE:
+      return "byte";
+    case JVMTI_PRIMITIVE_TYPE_CHAR:
+      return "char";
+    case JVMTI_PRIMITIVE_TYPE_SHORT:
+      return "short";
+    case JVMTI_PRIMITIVE_TYPE_INT:
+      return "int";
+    case JVMTI_PRIMITIVE_TYPE_FLOAT:
+      return "float";
+    case JVMTI_PRIMITIVE_TYPE_LONG:
+      return "long";
+    case JVMTI_PRIMITIVE_TYPE_DOUBLE:
+      return "double";
+  }
+  LOG(FATAL) << "Unknown type " << static_cast<size_t>(type);
+  UNREACHABLE();
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_followReferencesPrimitiveFields(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject initial_object) {
+  struct FindFieldCallbacks {
+    static jint JNICALL FollowReferencesCallback(
+        jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED,
+        const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED,
+        jlong class_tag ATTRIBUTE_UNUSED,
+        jlong referrer_class_tag ATTRIBUTE_UNUSED,
+        jlong size ATTRIBUTE_UNUSED,
+        jlong* tag_ptr ATTRIBUTE_UNUSED,
+        jlong* referrer_tag_ptr ATTRIBUTE_UNUSED,
+        jint length ATTRIBUTE_UNUSED,
+        void* user_data ATTRIBUTE_UNUSED) {
+      return JVMTI_VISIT_OBJECTS;  // Continue visiting.
+    }
+
+    static jint JNICALL PrimitiveFieldValueCallback(jvmtiHeapReferenceKind kind,
+                                                    const jvmtiHeapReferenceInfo* info,
+                                                    jlong class_tag,
+                                                    jlong* tag_ptr,
+                                                    jvalue value,
+                                                    jvmtiPrimitiveType value_type,
+                                                    void* user_data) {
+      FindFieldCallbacks* p = reinterpret_cast<FindFieldCallbacks*>(user_data);
+      if (*tag_ptr != 0) {
+        std::ostringstream oss;
+        oss << *tag_ptr
+            << '@'
+            << class_tag
+            << " ("
+            << (kind == JVMTI_HEAP_REFERENCE_FIELD ? "instance, " : "static, ")
+            << GetPrimitiveTypeName(value_type)
+            << ", index="
+            << info->field.index
+            << ") ";
+        // Be lazy, always print eight bytes.
+        static_assert(sizeof(jvalue) == sizeof(uint64_t), "Unexpected jvalue size");
+        uint64_t val;
+        memcpy(&val, &value, sizeof(uint64_t));  // To avoid undefined behavior.
+        oss << android::base::StringPrintf("%016" PRIx64, val);
+
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += oss.str();
+        // Update the tag to test whether that works.
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_reference_callback = FindFieldCallbacks::FollowReferencesCallback;
+  callbacks.primitive_field_callback = FindFieldCallbacks::PrimitiveFieldValueCallback;
+
+  FindFieldCallbacks ffc;
+  jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, initial_object, &callbacks, &ffc);
+  if (JvmtiErrorToException(env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(ffc.data.c_str());
+}
+
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/run b/test/913-heaps/run
index c6e62ae..dd35526 100755
--- a/test/913-heaps/run
+++ b/test/913-heaps/run
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-./default-run "$@" --jvmti
+./default-run "$@" --jvmti -Xcompiler-option -g
diff --git a/test/913-heaps/src/Main.java b/test/913-heaps/src/Main.java
index 5a11a5b..66f6883 100644
--- a/test/913-heaps/src/Main.java
+++ b/test/913-heaps/src/Main.java
@@ -15,6 +15,7 @@
  */
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -22,7 +23,36 @@
 public class Main {
   public static void main(String[] args) throws Exception {
     doTest();
-    doFollowReferencesTest();
+    new TestConfig().doFollowReferencesTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doStringTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doPrimitiveArrayTest();
+    doPrimitiveFieldTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    // Test klass filter.
+    System.out.println("--- klass ---");
+    new TestConfig(A.class, 0).doFollowReferencesTest();
+
+    // Test heap filter.
+    System.out.println("--- heap_filter ---");
+    System.out.println("---- tagged objects");
+    new TestConfig(null, 0x4).doFollowReferencesTest();
+    System.out.println("---- untagged objects");
+    new TestConfig(null, 0x8).doFollowReferencesTest();
+    System.out.println("---- tagged classes");
+    new TestConfig(null, 0x10).doFollowReferencesTest();
+    System.out.println("---- untagged classes");
+    new TestConfig(null, 0x20).doFollowReferencesTest();
   }
 
   public static void doTest() throws Exception {
@@ -33,6 +63,124 @@
     enableGcTracking(false);
   }
 
+  public static void doStringTest() throws Exception {
+    final String str = new String("HelloWorld");
+    final String str2 = new String("");
+    Object o = new Object() {
+      String s = str;
+      String s2 = str2;
+    };
+
+    setTag(str, 1);
+    setTag(str2, 2);
+    System.out.println(Arrays.toString(followReferencesString(o)));
+    System.out.println(getTag(str));
+    System.out.println(getTag(str2));
+  }
+
+  public static void doPrimitiveArrayTest() throws Exception {
+    final boolean[] zArray = new boolean[] { false, true };
+    setTag(zArray, 1);
+
+    final byte[] bArray = new byte[] { 1, 2, 3 };
+    setTag(bArray, 2);
+
+    final char[] cArray = new char[] { 'A', 'Z' };
+    setTag(cArray, 3);
+
+    final short[] sArray = new short[] { 1, 2, 3 };
+    setTag(sArray, 4);
+
+    final int[] iArray = new int[] { 1, 2, 3 };
+    setTag(iArray, 5);
+
+    final float[] fArray = new float[] { 0.0f, 1.0f };
+    setTag(fArray, 6);
+
+    final long[] lArray = new long[] { 1, 2, 3 };
+    setTag(lArray, 7);
+
+    final double[] dArray = new double[] { 0.0, 1.0 };
+    setTag(dArray, 8);
+
+    Object o = new Object() {
+      Object z = zArray;
+      Object b = bArray;
+      Object c = cArray;
+      Object s = sArray;
+      Object i = iArray;
+      Object f = fArray;
+      Object l = lArray;
+      Object d = dArray;
+    };
+
+    System.out.println(followReferencesPrimitiveArray(o));
+    System.out.print(getTag(zArray));
+    System.out.print(getTag(bArray));
+    System.out.print(getTag(cArray));
+    System.out.print(getTag(sArray));
+    System.out.print(getTag(iArray));
+    System.out.print(getTag(fArray));
+    System.out.print(getTag(lArray));
+    System.out.println(getTag(dArray));
+  }
+
+  public static void doPrimitiveFieldTest() throws Exception {
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsClasses();
+
+    doTestPrimitiveFieldsIntegral();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsFloat();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+  }
+
+  private static void doTestPrimitiveFieldsClasses() {
+    setTag(IntObject.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(IntObject.class));
+    System.out.println(getTag(IntObject.class));
+    setTag(IntObject.class, 0);
+
+    setTag(FloatObject.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(FloatObject.class));
+    System.out.println(getTag(FloatObject.class));
+    setTag(FloatObject.class, 0);
+
+    setTag(Inf1.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(Inf1.class));
+    System.out.println(getTag(Inf1.class));
+    setTag(Inf1.class, 0);
+
+    setTag(Inf2.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(Inf2.class));
+    System.out.println(getTag(Inf2.class));
+    setTag(Inf2.class, 0);
+  }
+
+  private static void doTestPrimitiveFieldsIntegral() {
+    IntObject intObject = new IntObject();
+    setTag(intObject, 10000);
+    System.out.println(followReferencesPrimitiveFields(intObject));
+    System.out.println(getTag(intObject));
+  }
+
+  private static void doTestPrimitiveFieldsFloat() {
+    FloatObject floatObject = new FloatObject();
+    setTag(floatObject, 10000);
+    System.out.println(followReferencesPrimitiveFields(floatObject));
+    System.out.println(getTag(floatObject));
+  }
+
   private static void run() {
     clearStats();
     forceGarbageCollection();
@@ -51,126 +199,136 @@
     System.out.println((s > 0) + " " + (f > 0));
   }
 
-  public static void doFollowReferencesTest() throws Exception {
-    // Force GCs to clean up dirt.
-    Runtime.getRuntime().gc();
-    Runtime.getRuntime().gc();
+  private static class TestConfig {
+    private Class<?> klass = null;
+    private int heapFilter = 0;
 
-    setTag(Thread.currentThread(), 3000);
-
-    {
-      ArrayList<Object> tmpStorage = new ArrayList<>();
-      doFollowReferencesTestNonRoot(tmpStorage);
-      tmpStorage = null;
+    public TestConfig() {
+    }
+    public TestConfig(Class<?> klass, int heapFilter) {
+      this.klass = klass;
+      this.heapFilter = heapFilter;
     }
 
-    // Force GCs to clean up dirt.
-    Runtime.getRuntime().gc();
-    Runtime.getRuntime().gc();
+    public void doFollowReferencesTest() throws Exception {
+      // Force GCs to clean up dirt.
+      Runtime.getRuntime().gc();
+      Runtime.getRuntime().gc();
 
-    doFollowReferencesTestRoot();
+      setTag(Thread.currentThread(), 3000);
 
-    // Force GCs to clean up dirt.
-    Runtime.getRuntime().gc();
-    Runtime.getRuntime().gc();
-  }
+      {
+        ArrayList<Object> tmpStorage = new ArrayList<>();
+        doFollowReferencesTestNonRoot(tmpStorage);
+        tmpStorage = null;
+      }
 
-  private static void doFollowReferencesTestNonRoot(ArrayList<Object> tmpStorage) {
-    Verifier v = new Verifier();
-    tagClasses(v);
-    A a = createTree(v);
-    tmpStorage.add(a);
-    v.add("0@0", "1@1000");  // tmpStorage[0] --(array-element)--> a.
+      // Force GCs to clean up dirt.
+      Runtime.getRuntime().gc();
+      Runtime.getRuntime().gc();
 
-    doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, null, v, null);
-    doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, null, v, "3@1001");
+      doFollowReferencesTestRoot();
 
-    tmpStorage.clear();
-  }
+      // Force GCs to clean up dirt.
+      Runtime.getRuntime().gc();
+      Runtime.getRuntime().gc();
+    }
 
-  private static void doFollowReferencesTestRoot() {
-    Verifier v = new Verifier();
-    tagClasses(v);
-    A a = createTree(v);
+    private void doFollowReferencesTestNonRoot(ArrayList<Object> tmpStorage) {
+      Verifier v = new Verifier();
+      tagClasses(v);
+      A a = createTree(v);
+      tmpStorage.add(a);
+      v.add("0@0", "1@1000");  // tmpStorage[0] --(array-element)--> a.
 
-    doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, a, v, null);
-    doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, a, v, "3@1001");
-  }
+      doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, null, v, null);
+      doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, null, v, "3@1001");
 
-  private static void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
-      Object asRoot, Verifier v, String additionalEnabled) {
-    String[] lines =
-        followReferences(0, null, root, stopAfter, followSet, asRoot);
+      tmpStorage.clear();
+    }
 
-    v.process(lines, additionalEnabled);
+    private void doFollowReferencesTestRoot() {
+      Verifier v = new Verifier();
+      tagClasses(v);
+      A a = createTree(v);
 
-    // TODO: Test filters.
-  }
+      doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, a, v, null);
+      doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, a, v, "3@1001");
+    }
 
-  private static void tagClasses(Verifier v) {
-    setTag(A.class, 1000);
+    private void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
+        Object asRoot, Verifier v, String additionalEnabled) {
+      String[] lines =
+          followReferences(heapFilter, klass, root, stopAfter, followSet, asRoot);
 
-    setTag(B.class, 1001);
-    v.add("1001@0", "1000@0");  // B.class --(superclass)--> A.class.
+      v.process(lines, additionalEnabled, heapFilter != 0 || klass != null);
+    }
 
-    setTag(C.class, 1002);
-    v.add("1002@0", "1001@0");  // C.class --(superclass)--> B.class.
-    v.add("1002@0", "2001@0");  // C.class --(interface)--> I2.class.
+    private static void tagClasses(Verifier v) {
+      setTag(A.class, 1000);
 
-    setTag(I1.class, 2000);
+      setTag(B.class, 1001);
+      v.add("1001@0", "1000@0");  // B.class --(superclass)--> A.class.
 
-    setTag(I2.class, 2001);
-    v.add("2001@0", "2000@0");  // I2.class --(interface)--> I1.class.
-  }
+      setTag(C.class, 1002);
+      v.add("1002@0", "1001@0");  // C.class --(superclass)--> B.class.
+      v.add("1002@0", "2001@0");  // C.class --(interface)--> I2.class.
 
-  private static A createTree(Verifier v) {
-    A aInst = new A();
-    setTag(aInst, 1);
-    String aInstStr = "1@1000";
-    String aClassStr = "1000@0";
-    v.add(aInstStr, aClassStr);  // A -->(class) --> A.class.
+      setTag(I1.class, 2000);
 
-    A a2Inst = new A();
-    setTag(a2Inst, 2);
-    aInst.foo = a2Inst;
-    String a2InstStr = "2@1000";
-    v.add(a2InstStr, aClassStr);  // A2 -->(class) --> A.class.
-    v.add(aInstStr, a2InstStr);   // A -->(field) --> A2.
+      setTag(I2.class, 2001);
+      v.add("2001@0", "2000@0");  // I2.class --(interface)--> I1.class.
+    }
 
-    B bInst = new B();
-    setTag(bInst, 3);
-    aInst.foo2 = bInst;
-    String bInstStr = "3@1001";
-    String bClassStr = "1001@0";
-    v.add(bInstStr, bClassStr);  // B -->(class) --> B.class.
-    v.add(aInstStr, bInstStr);   // A -->(field) --> B.
+    private static A createTree(Verifier v) {
+      A aInst = new A();
+      setTag(aInst, 1);
+      String aInstStr = "1@1000";
+      String aClassStr = "1000@0";
+      v.add(aInstStr, aClassStr);  // A -->(class) --> A.class.
 
-    A a3Inst = new A();
-    setTag(a3Inst, 4);
-    bInst.bar = a3Inst;
-    String a3InstStr = "4@1000";
-    v.add(a3InstStr, aClassStr);  // A3 -->(class) --> A.class.
-    v.add(bInstStr, a3InstStr);   // B -->(field) --> A3.
+      A a2Inst = new A();
+      setTag(a2Inst, 2);
+      aInst.foo = a2Inst;
+      String a2InstStr = "2@1000";
+      v.add(a2InstStr, aClassStr);  // A2 -->(class) --> A.class.
+      v.add(aInstStr, a2InstStr);   // A -->(field) --> A2.
 
-    C cInst = new C();
-    setTag(cInst, 5);
-    bInst.bar2 = cInst;
-    String cInstStr = "5@1000";
-    String cClassStr = "1002@0";
-    v.add(cInstStr, cClassStr);  // C -->(class) --> C.class.
-    v.add(bInstStr, cInstStr);   // B -->(field) --> C.
+      B bInst = new B();
+      setTag(bInst, 3);
+      aInst.foo2 = bInst;
+      String bInstStr = "3@1001";
+      String bClassStr = "1001@0";
+      v.add(bInstStr, bClassStr);  // B -->(class) --> B.class.
+      v.add(aInstStr, bInstStr);   // A -->(field) --> B.
 
-    A a4Inst = new A();
-    setTag(a4Inst, 6);
-    cInst.baz = a4Inst;
-    String a4InstStr = "6@1000";
-    v.add(a4InstStr, aClassStr);  // A4 -->(class) --> A.class.
-    v.add(cInstStr, a4InstStr);   // C -->(field) --> A4.
+      A a3Inst = new A();
+      setTag(a3Inst, 4);
+      bInst.bar = a3Inst;
+      String a3InstStr = "4@1000";
+      v.add(a3InstStr, aClassStr);  // A3 -->(class) --> A.class.
+      v.add(bInstStr, a3InstStr);   // B -->(field) --> A3.
 
-    cInst.baz2 = aInst;
-    v.add(cInstStr, aInstStr);  // C -->(field) --> A.
+      C cInst = new C();
+      setTag(cInst, 5);
+      bInst.bar2 = cInst;
+      String cInstStr = "5@1000";
+      String cClassStr = "1002@0";
+      v.add(cInstStr, cClassStr);  // C -->(class) --> C.class.
+      v.add(bInstStr, cInstStr);   // B -->(field) --> C.
 
-    return aInst;
+      A a4Inst = new A();
+      setTag(a4Inst, 6);
+      cInst.baz = a4Inst;
+      String a4InstStr = "6@1000";
+      v.add(a4InstStr, aClassStr);  // A4 -->(class) --> A.class.
+      v.add(cInstStr, a4InstStr);   // C -->(field) --> A4.
+
+      cInst.baz2 = aInst;
+      v.add(cInstStr, aInstStr);  // C -->(field) --> A.
+
+      return aInst;
+    }
   }
 
   public static class A {
@@ -214,7 +372,35 @@
     }
   }
 
+  private static interface Inf1 {
+    public final static int A = 1;
+  }
+
+  private static interface Inf2 extends Inf1 {
+    public final static int B = 1;
+  }
+
+  private static class IntObject implements Inf1 {
+    byte b = (byte)1;
+    char c= 'a';
+    short s = (short)2;
+    int i = 3;
+    long l = 4;
+    Object o = new Object();
+    static int sI = 5;
+  }
+
+  private static class FloatObject extends IntObject implements Inf2 {
+    float f = 1.23f;
+    double d = 1.23;
+    Object p = new Object();
+    static int sI = 6;
+  }
+
   public static class Verifier {
+    // Should roots with vreg=-1 be printed?
+    public final static boolean PRINT_ROOTS_WITH_UNKNOWN_VREG = false;
+
     public static class Node {
       public String referrer;
 
@@ -243,7 +429,7 @@
       }
     }
 
-    public void process(String[] lines, String additionalEnabledReferrer) {
+    public void process(String[] lines, String additionalEnabledReferrer, boolean filtered) {
       // This method isn't optimal. The loops could be merged. However, it's more readable if
       // the different parts are separated.
 
@@ -299,10 +485,28 @@
             continue;
           }
           lastRoot = l;
+          if (!PRINT_ROOTS_WITH_UNKNOWN_VREG && l.indexOf("vreg=-1") > 0) {
+            continue;
+          }
           System.out.println(l);
         }
       }
 
+      if (filtered) {
+        // If we aren't tracking dependencies, just sort the lines and print.
+        // TODO: As the verifier is currently using the output lines to track dependencies, we
+        //       cannot verify that output is correct when parts of it are suppressed by filters.
+        //       To correctly track this we need to take node information into account, and
+        //       actually analyze the graph.
+        Collections.sort(nonRootLines);
+        for (String l : nonRootLines) {
+          System.out.println(l);
+        }
+
+        System.out.println("---");
+        return;
+      }
+
       // Iterate through the lines, keeping track of which referrers are visited, to ensure the
       // order is acceptable.
       HashSet<String> enabled = new HashSet<>();
@@ -379,9 +583,12 @@
   private static native int getGcFinishes();
   private static native void forceGarbageCollection();
 
-  private static native void setTag(Object o, long tag);
-  private static native long getTag(Object o);
+  public static native void setTag(Object o, long tag);
+  public static native long getTag(Object o);
 
-  private static native String[] followReferences(int heapFilter, Class<?> klassFilter,
+  public static native String[] followReferences(int heapFilter, Class<?> klassFilter,
       Object initialObject, int stopAfter, int followSet, Object jniRef);
+  public static native String[] followReferencesString(Object initialObject);
+  public static native String followReferencesPrimitiveArray(Object initialObject);
+  public static native String followReferencesPrimitiveFields(Object initialObject);
 }
diff --git a/test/918-fields/fields.cc b/test/918-fields/fields.cc
index 7d29912..c659126 100644
--- a/test/918-fields/fields.cc
+++ b/test/918-fields/fields.cc
@@ -18,7 +18,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/920-objects/objects.cc b/test/920-objects/objects.cc
index 0553a9d..ad1431e 100644
--- a/test/920-objects/objects.cc
+++ b/test/920-objects/objects.cc
@@ -18,7 +18,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/921-hello-failure/expected.txt b/test/921-hello-failure/expected.txt
index a5dc10d..fdbfbe2 100644
--- a/test/921-hello-failure/expected.txt
+++ b/test/921-hello-failure/expected.txt
@@ -50,3 +50,6 @@
 hello there again - FieldChange
 Transformation error : java.lang.Exception(Failed to redefine class <LTransform4;> due to JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED)
 hello there again - FieldChange
+hello - Unmodifiable
+Transformation error : java.lang.Exception(Failed to redefine class <[LTransform;> due to JVMTI_ERROR_UNMODIFIABLE_CLASS)
+hello - Unmodifiable
diff --git a/test/921-hello-failure/src/Main.java b/test/921-hello-failure/src/Main.java
index 5bbe2b5..6779ed8 100644
--- a/test/921-hello-failure/src/Main.java
+++ b/test/921-hello-failure/src/Main.java
@@ -32,6 +32,7 @@
     NewField.doTest(new Transform());
     MissingField.doTest(new Transform4("there"));
     FieldChange.doTest(new Transform4("there again"));
+    Unmodifiable.doTest(new Transform[] { new Transform(), });
   }
 
   // Transforms the class. This throws an exception if something goes wrong.
diff --git a/test/921-hello-failure/src/Unmodifiable.java b/test/921-hello-failure/src/Unmodifiable.java
new file mode 100644
index 0000000..ad05f51
--- /dev/null
+++ b/test/921-hello-failure/src/Unmodifiable.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+
+class Unmodifiable {
+  // The following is a base64 encoding of a valid class file.
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAFQoABgAPBwAQCAARCgACABIHABMHABQBAAY8aW5pdD4BAAMoKVYBAARDb2RlAQAP" +
+    "TGluZU51bWJlclRhYmxlAQAFc2F5SGkBABUoTGphdmEvbGFuZy9TdHJpbmc7KVYBAApTb3VyY2VG" +
+    "aWxlAQAOVHJhbnNmb3JtLmphdmEMAAcACAEAD2phdmEvbGFuZy9FcnJvcgEAFVNob3VsZCBub3Qg" +
+    "YmUgY2FsbGVkIQwABwAMAQAJVHJhbnNmb3JtAQAQamF2YS9sYW5nL09iamVjdAAgAAUABgAAAAAA" +
+    "AgAAAAcACAABAAkAAAAdAAEAAQAAAAUqtwABsQAAAAEACgAAAAYAAQAAAAIAAAALAAwAAQAJAAAA" +
+    "IgADAAIAAAAKuwACWRIDtwAEvwAAAAEACgAAAAYAAQAAAAQAAQANAAAAAgAO");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQCrV81cy4Q+YKMMMqc0bZEO5Y1X5u7irPeQAgAAcAAAAHhWNBIAAAAAAAAAAPwBAAAL" +
+    "AAAAcAAAAAUAAACcAAAAAgAAALAAAAAAAAAAAAAAAAQAAADIAAAAAQAAAOgAAACIAQAACAEAAEoB" +
+    "AABSAQAAXwEAAHIBAACGAQAAmgEAALEBAADBAQAAxAEAAMgBAADcAQAAAQAAAAIAAAADAAAABAAA" +
+    "AAcAAAAHAAAABAAAAAAAAAAIAAAABAAAAEQBAAAAAAAAAAAAAAAAAQAKAAAAAQABAAAAAAACAAAA" +
+    "AAAAAAAAAAAAAAAAAgAAAAAAAAAGAAAAAAAAAO4BAAAAAAAAAQABAAEAAADjAQAABAAAAHAQAwAA" +
+    "AA4ABAACAAIAAADoAQAACQAAACIAAQAbAQUAAABwIAIAEAAnAAAAAQAAAAMABjxpbml0PgALTFRy" +
+    "YW5zZm9ybTsAEUxqYXZhL2xhbmcvRXJyb3I7ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xh" +
+    "bmcvU3RyaW5nOwAVU2hvdWxkIG5vdCBiZSBjYWxsZWQhAA5UcmFuc2Zvcm0uamF2YQABVgACVkwA" +
+    "EmVtaXR0ZXI6IGphY2stNC4yNAAFc2F5SGkAAgAHDgAEAQAHDgAAAAEBAICABIgCAQCgAgwAAAAA" +
+    "AAAAAQAAAAAAAAABAAAACwAAAHAAAAACAAAABQAAAJwAAAADAAAAAgAAALAAAAAFAAAABAAAAMgA" +
+    "AAAGAAAAAQAAAOgAAAABIAAAAgAAAAgBAAABEAAAAQAAAEQBAAACIAAACwAAAEoBAAADIAAAAgAA" +
+    "AOMBAAAAIAAAAQAAAO4BAAAAEAAAAQAAAPwBAAA=");
+
+  public static void doTest(Transform[] ts) {
+    ts[0].sayHi("Unmodifiable");
+    try {
+      Main.doCommonClassRedefinition(Transform[].class, CLASS_BYTES, DEX_BYTES);
+    } catch (Exception e) {
+      System.out.println(
+          "Transformation error : " + e.getClass().getName() + "(" + e.getMessage() + ")");
+    }
+    ts[0].sayHi("Unmodifiable");
+  }
+}
diff --git a/test/922-properties/properties.cc b/test/922-properties/properties.cc
index cb732c7..3fd274e 100644
--- a/test/922-properties/properties.cc
+++ b/test/922-properties/properties.cc
@@ -18,7 +18,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedUtfChars.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/923-monitors/monitors.cc b/test/923-monitors/monitors.cc
index 4baa530..131fc6a 100644
--- a/test/923-monitors/monitors.cc
+++ b/test/923-monitors/monitors.cc
@@ -18,7 +18,7 @@
 
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedUtfChars.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/924-threads/src/Main.java b/test/924-threads/src/Main.java
index f18d70e..716f59e 100644
--- a/test/924-threads/src/Main.java
+++ b/test/924-threads/src/Main.java
@@ -135,8 +135,12 @@
     synchronized(cdl3_2) {
       cdl3_1.countDown();
       cdl3_2.await();
-      Thread.yield();
-      Thread.sleep(100);
+      // While the latch improves the chances to make good progress, scheduling might still be
+      // messy. Wait till we get the right Java-side Thread state.
+      do {
+        Thread.yield();
+      } while (t.getState() != Thread.State.BLOCKED);
+      Thread.sleep(10);
       printThreadState(t);
     }
 
diff --git a/test/924-threads/threads.cc b/test/924-threads/threads.cc
index 0380433..14ea5af 100644
--- a/test/924-threads/threads.cc
+++ b/test/924-threads/threads.cc
@@ -20,7 +20,7 @@
 #include "base/macros.h"
 #include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/925-threadgroups/threadgroups.cc b/test/925-threadgroups/threadgroups.cc
index 6c6e835..2feaab0 100644
--- a/test/925-threadgroups/threadgroups.cc
+++ b/test/925-threadgroups/threadgroups.cc
@@ -20,7 +20,7 @@
 #include "base/macros.h"
 #include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/927-timers/timers.cc b/test/927-timers/timers.cc
index 58d5c27..7b1d5c3 100644
--- a/test/927-timers/timers.cc
+++ b/test/927-timers/timers.cc
@@ -20,7 +20,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
diff --git a/test/928-jni-table/jni_table.cc b/test/928-jni-table/jni_table.cc
index 5123d3a..b5c0efd 100644
--- a/test/928-jni-table/jni_table.cc
+++ b/test/928-jni-table/jni_table.cc
@@ -17,7 +17,7 @@
 #include <stdio.h>
 
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 #include "base/logging.h"
 #include "base/macros.h"
diff --git a/test/929-search/search.cc b/test/929-search/search.cc
index d1c6984..ad7a053 100644
--- a/test/929-search/search.cc
+++ b/test/929-search/search.cc
@@ -20,7 +20,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedUtfChars.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/931-agent-thread/agent_thread.cc b/test/931-agent-thread/agent_thread.cc
index a488d9a..f8f9e48 100644
--- a/test/931-agent-thread/agent_thread.cc
+++ b/test/931-agent-thread/agent_thread.cc
@@ -21,7 +21,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "thread-inl.h"
diff --git a/test/933-misc-events/misc_events.cc b/test/933-misc-events/misc_events.cc
index 860d4b5..7043350 100644
--- a/test/933-misc-events/misc_events.cc
+++ b/test/933-misc-events/misc_events.cc
@@ -21,7 +21,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
diff --git a/test/936-search-onload/search_onload.cc b/test/936-search-onload/search_onload.cc
index 2286a46..3b19ca5 100644
--- a/test/936-search-onload/search_onload.cc
+++ b/test/936-search-onload/search_onload.cc
@@ -22,7 +22,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedUtfChars.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/944-transform-classloaders/classloader.cc b/test/944-transform-classloaders/classloader.cc
index 5fbd8e1..7cb3c08 100644
--- a/test/944-transform-classloaders/classloader.cc
+++ b/test/944-transform-classloaders/classloader.cc
@@ -16,8 +16,8 @@
 
 #include "base/macros.h"
 #include "jni.h"
+#include "jvmti.h"
 #include "mirror/class-inl.h"
-#include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
 
 #include "ti-agent/common_helper.h"
diff --git a/test/945-obsolete-native/obsolete_native.cc b/test/945-obsolete-native/obsolete_native.cc
index 061e7af..442836b 100644
--- a/test/945-obsolete-native/obsolete_native.cc
+++ b/test/945-obsolete-native/obsolete_native.cc
@@ -24,7 +24,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
diff --git a/test/946-obsolete-throw/src/Main.java b/test/946-obsolete-throw/src/Main.java
index 3ff97ae..21fe972 100644
--- a/test/946-obsolete-throw/src/Main.java
+++ b/test/946-obsolete-throw/src/Main.java
@@ -71,7 +71,7 @@
       t.sayHi(new DoRedefinitionClass());
     } catch (Throwable e) {
       System.out.println("Received error : " + e);
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
     t.sayHi(() -> { System.out.println("Not doing anything here"); });
   }
diff --git a/test/950-redefine-intrinsic/expected.txt b/test/950-redefine-intrinsic/expected.txt
new file mode 100644
index 0000000..1264c94
--- /dev/null
+++ b/test/950-redefine-intrinsic/expected.txt
@@ -0,0 +1 @@
+Finished!
diff --git a/test/950-redefine-intrinsic/info.txt b/test/950-redefine-intrinsic/info.txt
new file mode 100644
index 0000000..c19d2b4
--- /dev/null
+++ b/test/950-redefine-intrinsic/info.txt
@@ -0,0 +1,3 @@
+Tests basic functions in the jvmti plugin.
+
+Tests that we are able to redefine intrinsic functions.
diff --git a/test/577-profile-foreign-dex/run b/test/950-redefine-intrinsic/run
old mode 100644
new mode 100755
similarity index 78%
copy from test/577-profile-foreign-dex/run
copy to test/950-redefine-intrinsic/run
index ad57d14..e92b873
--- a/test/577-profile-foreign-dex/run
+++ b/test/950-redefine-intrinsic/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/950-redefine-intrinsic/src/Main.java b/test/950-redefine-intrinsic/src/Main.java
new file mode 100644
index 0000000..30cd3ab
--- /dev/null
+++ b/test/950-redefine-intrinsic/src/Main.java
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+import java.util.Random;
+import java.util.function.*;
+import java.util.stream.*;
+
+public class Main {
+
+  // The bytes below define the following java program.
+  // package java.lang;
+  // import java.math.*;
+  // public final class Long extends Number implements Comparable<Long> {
+  //     public static final long MIN_VALUE = 0;
+  //     public static final long MAX_VALUE = 0;
+  //     public static final Class<Long> TYPE = null;
+  //     static { }
+  //     // Used for Stream.count for some reason.
+  //     public static long sum(long a, long b) {
+  //       return a + b;
+  //     }
+  //     // Used in stream/lambda functions.
+  //     public Long(long value) {
+  //       this.value = value;
+  //     }
+  //     // Used in stream/lambda functions.
+  //     public static Long valueOf(long l) {
+  //       return new Long(l);
+  //     }
+  //     // Intrinsic! Do something cool. Return i + 1
+  //     public static long highestOneBit(long i) {
+  //       return i + 1;
+  //     }
+  //     // Intrinsic! Do something cool. Return i - 1
+  //     public static long lowestOneBit(long i) {
+  //       return i - 1;
+  //     }
+  //     // Intrinsic! Do something cool. Return i + i
+  //     public static int numberOfLeadingZeros(long i) {
+  //       return (int)(i + i);
+  //     }
+  //     // Intrinsic! Do something cool. Return i & (i >>> 1);
+  //     public static int numberOfTrailingZeros(long i) {
+  //       return (int)(i & (i >>> 1));
+  //     }
+  //     // Intrinsic! Do something cool. Return 5
+  //      public static int bitCount(long i) {
+  //        return 5;
+  //      }
+  //     // Intrinsic! Do something cool. Return i
+  //     public static long rotateLeft(long i, int distance) {
+  //       return i;
+  //     }
+  //     // Intrinsic! Do something cool. Return 10 * i
+  //     public static long rotateRight(long i, int distance) {
+  //       return 10 * i;
+  //     }
+  //     // Intrinsic! Do something cool. Return -i
+  //     public static long reverse(long i) {
+  //       return -i;
+  //     }
+  //     // Intrinsic! Do something cool. Return 0
+  //     public static int signum(long i) {
+  //       return 0;
+  //     }
+  //     // Intrinsic! Do something cool. Return 0
+  //     public static long reverseBytes(long i) {
+  //       return 0;
+  //     }
+  //     public String toString() {
+  //       return "Redefined Long! value (as double)=" + ((double)value);
+  //     }
+  //     public static String toString(long i, int radix) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static String toUnsignedString(long i, int radix) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     private static BigInteger toUnsignedBigInteger(long i) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static String toHexString(long i) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static String toOctalString(long i) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static String toBinaryString(long i) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     static String toUnsignedString0(long val, int shift) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     static int formatUnsignedLong(long val, int shift, char[] buf, int offset, int len) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static String toString(long i) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static String toUnsignedString(long i) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     static void getChars(long i, int index, char[] buf) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     static int stringSize(long x) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long parseLong(String s, int radix) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long parseLong(String s) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long parseUnsignedLong(String s, int radix) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long parseUnsignedLong(String s) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static Long valueOf(String s, int radix) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static Long valueOf(String s) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static Long decode(String nm) throws NumberFormatException {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     private final long value;
+  //     public Long(String s) throws NumberFormatException {
+  //       this(0);
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public byte byteValue() {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public short shortValue() {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public int intValue() {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public long longValue() {
+  //       return value;
+  //     }
+  //     public float floatValue() {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public double doubleValue() {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public int hashCode() {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static int hashCode(long value) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public boolean equals(Object obj) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static Long getLong(String nm) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static Long getLong(String nm, long val) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static Long getLong(String nm, Long val) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public int compareTo(Long anotherLong) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static int compare(long x, long y) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static int compareUnsigned(long x, long y) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long divideUnsigned(long dividend, long divisor) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long remainderUnsigned(long dividend, long divisor) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static final int SIZE = 64;
+  //     public static final int BYTES = SIZE / Byte.SIZE;
+  //     public static long max(long a, long b) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     public static long min(long a, long b) {
+  //       throw new Error("Method redefined away!");
+  //     }
+  //     private static final long serialVersionUID = 0;
+  // }
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAiQUAAAAAAAAACgcAdQoAAwB2CAB3CgADAHgJAA0AeQoAAwB6CgADAHsHAHwIAH0K" +
+    "AAoAfgcAfwoADQCACgASAHYKAA0AgQkADQCCBwCDBwCEAQAJTUlOX1ZBTFVFAQABSgEADUNvbnN0" +
+    "YW50VmFsdWUFAAAAAAAAAAABAAlNQVhfVkFMVUUBAARUWVBFAQARTGphdmEvbGFuZy9DbGFzczsB" +
+    "AAlTaWduYXR1cmUBACNMamF2YS9sYW5nL0NsYXNzPExqYXZhL2xhbmcvTG9uZzs+OwEABXZhbHVl" +
+    "AQAEU0laRQEAAUkDAAAAQAEABUJZVEVTAwAAAAgBABBzZXJpYWxWZXJzaW9uVUlEAQANaGlnaGVz" +
+    "dE9uZUJpdAEABChKKUoBAARDb2RlAQAPTGluZU51bWJlclRhYmxlAQAMbG93ZXN0T25lQml0AQAU" +
+    "bnVtYmVyT2ZMZWFkaW5nWmVyb3MBAAQoSilJAQAVbnVtYmVyT2ZUcmFpbGluZ1plcm9zAQAIYml0" +
+    "Q291bnQBAApyb3RhdGVMZWZ0AQAFKEpJKUoBAAtyb3RhdGVSaWdodAEAB3JldmVyc2UBAAZzaWdu" +
+    "dW0BAAxyZXZlcnNlQnl0ZXMBAAh0b1N0cmluZwEAFCgpTGphdmEvbGFuZy9TdHJpbmc7AQAWKEpJ" +
+    "KUxqYXZhL2xhbmcvU3RyaW5nOwEAEHRvVW5zaWduZWRTdHJpbmcBABR0b1Vuc2lnbmVkQmlnSW50" +
+    "ZWdlcgEAGShKKUxqYXZhL21hdGgvQmlnSW50ZWdlcjsBAAt0b0hleFN0cmluZwEAFShKKUxqYXZh" +
+    "L2xhbmcvU3RyaW5nOwEADXRvT2N0YWxTdHJpbmcBAA50b0JpbmFyeVN0cmluZwEAEXRvVW5zaWdu" +
+    "ZWRTdHJpbmcwAQASZm9ybWF0VW5zaWduZWRMb25nAQAJKEpJW0NJSSlJAQAIZ2V0Q2hhcnMBAAco" +
+    "SklbQylWAQAKc3RyaW5nU2l6ZQEACXBhcnNlTG9uZwEAFihMamF2YS9sYW5nL1N0cmluZztJKUoB" +
+    "AApFeGNlcHRpb25zBwCFAQAVKExqYXZhL2xhbmcvU3RyaW5nOylKAQARcGFyc2VVbnNpZ25lZExv" +
+    "bmcBAAd2YWx1ZU9mAQAlKExqYXZhL2xhbmcvU3RyaW5nO0kpTGphdmEvbGFuZy9Mb25nOwEAJChM" +
+    "amF2YS9sYW5nL1N0cmluZzspTGphdmEvbGFuZy9Mb25nOwEAEyhKKUxqYXZhL2xhbmcvTG9uZzsB" +
+    "AAZkZWNvZGUBAAY8aW5pdD4BAAQoSilWAQAVKExqYXZhL2xhbmcvU3RyaW5nOylWAQAJYnl0ZVZh" +
+    "bHVlAQADKClCAQAKc2hvcnRWYWx1ZQEAAygpUwEACGludFZhbHVlAQADKClJAQAJbG9uZ1ZhbHVl" +
+    "AQADKClKAQAKZmxvYXRWYWx1ZQEAAygpRgEAC2RvdWJsZVZhbHVlAQADKClEAQAIaGFzaENvZGUB" +
+    "AAZlcXVhbHMBABUoTGphdmEvbGFuZy9PYmplY3Q7KVoBAAdnZXRMb25nAQAlKExqYXZhL2xhbmcv" +
+    "U3RyaW5nO0opTGphdmEvbGFuZy9Mb25nOwEANChMamF2YS9sYW5nL1N0cmluZztMamF2YS9sYW5n" +
+    "L0xvbmc7KUxqYXZhL2xhbmcvTG9uZzsBAAljb21wYXJlVG8BABMoTGphdmEvbGFuZy9Mb25nOylJ" +
+    "AQAHY29tcGFyZQEABShKSilJAQAPY29tcGFyZVVuc2lnbmVkAQAOZGl2aWRlVW5zaWduZWQBAAUo" +
+    "SkopSgEAEXJlbWFpbmRlclVuc2lnbmVkAQADc3VtAQADbWF4AQADbWluAQAVKExqYXZhL2xhbmcv" +
+    "T2JqZWN0OylJAQAIPGNsaW5pdD4BAAMoKVYBADpMamF2YS9sYW5nL051bWJlcjtMamF2YS9sYW5n" +
+    "L0NvbXBhcmFibGU8TGphdmEvbGFuZy9Mb25nOz47AQAKU291cmNlRmlsZQEACUxvbmcuamF2YQEA" +
+    "F2phdmEvbGFuZy9TdHJpbmdCdWlsZGVyDABPAHEBACJSZWRlZmluZWQgTG9uZyEgdmFsdWUgKGFz" +
+    "IGRvdWJsZSk9DACGAIcMAB4AFQwAhgCIDAA0ADUBAA9qYXZhL2xhbmcvRXJyb3IBABZNZXRob2Qg" +
+    "cmVkZWZpbmVkIGF3YXkhDABPAFEBAA5qYXZhL2xhbmcvTG9uZwwATwBQDABkAGUMABoAGwEAEGph" +
+    "dmEvbGFuZy9OdW1iZXIBABRqYXZhL2xhbmcvQ29tcGFyYWJsZQEAH2phdmEvbGFuZy9OdW1iZXJG" +
+    "b3JtYXRFeGNlcHRpb24BAAZhcHBlbmQBAC0oTGphdmEvbGFuZy9TdHJpbmc7KUxqYXZhL2xhbmcv" +
+    "U3RyaW5nQnVpbGRlcjsBABwoRClMamF2YS9sYW5nL1N0cmluZ0J1aWxkZXI7ADEADQASAAEAEwAH" +
+    "ABkAFAAVAAEAFgAAAAIAFwAZABkAFQABABYAAAACABcAGQAaABsAAQAcAAAAAgAdABIAHgAVAAAA" +
+    "GQAfACAAAQAWAAAAAgAhABkAIgAgAAEAFgAAAAIAIwAaACQAFQABABYAAAACABcANwAJACUAJgAB" +
+    "ACcAAAAcAAQAAgAAAAQeCmGtAAAAAQAoAAAABgABAAAADgAJACkAJgABACcAAAAcAAQAAgAAAAQe" +
+    "CmWtAAAAAQAoAAAABgABAAAAEwAJACoAKwABACcAAAAdAAQAAgAAAAUeHmGIrAAAAAEAKAAAAAYA" +
+    "AQAAABgACQAsACsAAQAnAAAAHwAFAAIAAAAHHh4EfX+IrAAAAAEAKAAAAAYAAQAAAB0ACQAtACsA" +
+    "AQAnAAAAGgABAAIAAAACCKwAAAABACgAAAAGAAEAAAAiAAkALgAvAAEAJwAAABoAAgADAAAAAh6t" +
+    "AAAAAQAoAAAABgABAAAAJwAJADAALwABACcAAAAeAAQAAwAAAAYUAAEeaa0AAAABACgAAAAGAAEA" +
+    "AAAsAAkAMQAmAAEAJwAAABsAAgACAAAAAx51rQAAAAEAKAAAAAYAAQAAADEACQAyACsAAQAnAAAA" +
+    "GgABAAIAAAACA6wAAAABACgAAAAGAAEAAAA2AAkAMwAmAAEAJwAAABoAAgACAAAAAgmtAAAAAQAo" +
+    "AAAABgABAAAAOwABADQANQABACcAAAAwAAMAAQAAABi7AANZtwAEEgW2AAYqtAAHirYACLYACbAA" +
+    "AAABACgAAAAGAAEAAAA/AAkANAA2AAEAJwAAACIAAwADAAAACrsAClkSC7cADL8AAAABACgAAAAG" +
+    "AAEAAABDAAkANwA2AAEAJwAAACIAAwADAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABGAAoA" +
+    "OAA5AAEAJwAAACIAAwACAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABKAAkAOgA7AAEAJwAA" +
+    "ACIAAwACAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABNAAkAPAA7AAEAJwAAACIAAwACAAAA" +
+    "CrsAClkSC7cADL8AAAABACgAAAAGAAEAAABRAAkAPQA7AAEAJwAAACIAAwACAAAACrsAClkSC7cA" +
+    "DL8AAAABACgAAAAGAAEAAABVAAgAPgA2AAEAJwAAACIAAwADAAAACrsAClkSC7cADL8AAAABACgA" +
+    "AAAGAAEAAABZAAgAPwBAAAEAJwAAACIAAwAGAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABd" +
+    "AAkANAA7AAEAJwAAACIAAwACAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABhAAkANwA7AAEA" +
+    "JwAAACIAAwACAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABlAAgAQQBCAAEAJwAAACIAAwAE" +
+    "AAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAABpAAgAQwArAAEAJwAAACIAAwACAAAACrsAClkS" +
+    "C7cADL8AAAABACgAAAAGAAEAAABtAAkARABFAAIAJwAAACIAAwACAAAACrsAClkSC7cADL8AAAAB" +
+    "ACgAAAAGAAEAAABxAEYAAAAEAAEARwAJAEQASAACACcAAAAiAAMAAQAAAAq7AApZEgu3AAy/AAAA" +
+    "AQAoAAAABgABAAAAdQBGAAAABAABAEcACQBJAEUAAgAnAAAAIgADAAIAAAAKuwAKWRILtwAMvwAA" +
+    "AAEAKAAAAAYAAQAAAHkARgAAAAQAAQBHAAkASQBIAAIAJwAAACIAAwABAAAACrsAClkSC7cADL8A" +
+    "AAABACgAAAAGAAEAAAB9AEYAAAAEAAEARwAJAEoASwACACcAAAAiAAMAAgAAAAq7AApZEgu3AAy/" +
+    "AAAAAQAoAAAABgABAAAAgQBGAAAABAABAEcACQBKAEwAAgAnAAAAIgADAAEAAAAKuwAKWRILtwAM" +
+    "vwAAAAEAKAAAAAYAAQAAAIQARgAAAAQAAQBHAAkASgBNAAEAJwAAACEABAACAAAACbsADVketwAO" +
+    "sAAAAAEAKAAAAAYAAQAAAIcACQBOAEwAAgAnAAAAIgADAAEAAAAKuwAKWRILtwAMvwAAAAEAKAAA" +
+    "AAYAAQAAAIsARgAAAAQAAQBHAAEATwBQAAEAJwAAACoAAwADAAAACiq3AA8qH7UAB7EAAAABACgA" +
+    "AAAOAAMAAACQAAQAkQAJAJIAAQBPAFEAAgAnAAAAKwADAAIAAAAPKgm3AA67AApZEgu3AAy/AAAA" +
+    "AQAoAAAACgACAAAAlQAFAJYARgAAAAQAAQBHAAEAUgBTAAEAJwAAACIAAwABAAAACrsAClkSC7cA" +
+    "DL8AAAABACgAAAAGAAEAAACaAAEAVABVAAEAJwAAACIAAwABAAAACrsAClkSC7cADL8AAAABACgA" +
+    "AAAGAAEAAACeAAEAVgBXAAEAJwAAACIAAwABAAAACrsAClkSC7cADL8AAAABACgAAAAGAAEAAACi" +
+    "AAEAWABZAAEAJwAAAB0AAgABAAAABSq0AAetAAAAAQAoAAAABgABAAAApgABAFoAWwABACcAAAAi" +
+    "AAMAAQAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAAqgABAFwAXQABACcAAAAiAAMAAQAAAAq7" +
+    "AApZEgu3AAy/AAAAAQAoAAAABgABAAAArgABAF4AVwABACcAAAAiAAMAAQAAAAq7AApZEgu3AAy/" +
+    "AAAAAQAoAAAABgABAAAAsgAJAF4AKwABACcAAAAiAAMAAgAAAAq7AApZEgu3AAy/AAAAAQAoAAAA" +
+    "BgABAAAAtgABAF8AYAABACcAAAAiAAMAAgAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAAugAJ" +
+    "AGEATAABACcAAAAiAAMAAQAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAAvgAJAGEAYgABACcA" +
+    "AAAiAAMAAwAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAAwgAJAGEAYwABACcAAAAiAAMAAgAA" +
+    "AAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAAxgABAGQAZQABACcAAAAiAAMAAgAAAAq7AApZEgu3" +
+    "AAy/AAAAAQAoAAAABgABAAAAyQAJAGYAZwABACcAAAAiAAMABAAAAAq7AApZEgu3AAy/AAAAAQAo" +
+    "AAAABgABAAAAzQAJAGgAZwABACcAAAAiAAMABAAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAA" +
+    "0QAJAGkAagABACcAAAAiAAMABAAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAA1QAJAGsAagAB" +
+    "ACcAAAAiAAMABAAAAAq7AApZEgu3AAy/AAAAAQAoAAAABgABAAAA2QAJAGwAagABACcAAAAcAAQA" +
+    "BAAAAAQeIGGtAAAAAQAoAAAABgABAAAA4AAJAG0AagABACcAAAAiAAMABAAAAAq7AApZEgu3AAy/" +
+    "AAAAAQAoAAAABgABAAAA5AAJAG4AagABACcAAAAiAAMABAAAAAq7AApZEgu3AAy/AAAAAQAoAAAA" +
+    "BgABAAAA5xBBAGQAbwABACcAAAAhAAIAAgAAAAkqK8AADbYAEKwAAAABACgAAAAGAAEAAAAFAAgA" +
+    "cABxAAEAJwAAACEAAQAAAAAABQGzABGxAAAAAQAoAAAACgACAAAACAAEAAoAAgAcAAAAAgByAHMA" +
+    "AAACAHQ=");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQAFtMupmeDN6Ck5nxdemGsp43KmLNpYLrMYFgAAcAAAAHhWNBIAAAAAAAAAAEgVAABl" +
+    "AAAAcAAAABUAAAAEAgAAIAAAAFgCAAAHAAAA2AMAAD0AAAAQBAAAAQAAAPgFAAAAEAAAGAYAAB4O" +
+    "AAAhDgAAKw4AADMOAAA3DgAAOg4AAEEOAABEDgAARw4AAEoOAABODgAAVg4AAFsOAABfDgAAYg4A" +
+    "AGYOAABrDgAAcA4AAHQOAAB5DgAAfA4AAIAOAACEDgAAiQ4AAI0OAACSDgAAlw4AAJwOAAC7DgAA" +
+    "1w4AAOkOAAD8DgAAEw8AACsPAAA+DwAAUA8AAGQPAACHDwAAmw8AAK8PAADKDwAA4g8AAO0PAAD4" +
+    "DwAAAxAAABsQAAA/EAAAQhAAAEgQAABOEAAAURAAAFUQAABbEAAAXxAAAGIQAABmEAAAahAAAHIQ" +
+    "AAB8EAAAhxAAAJAQAACbEAAArBAAALQQAADEEAAA0RAAAOUQAADtEAAA+RAAAA0RAAAXEQAAIBEA" +
+    "ACoRAAA5EQAAQxEAAE4RAABcEQAAYREAAGYRAAB8EQAAkxEAAJ4RAACxEQAAxBEAAM0RAADbEQAA" +
+    "5xEAAPQRAAAGEgAAEhIAABoSAAAmEgAAKxIAADsSAABIEgAAVxIAAGESAAB3EgAAiRIAAJwSAACj" +
+    "EgAABAAAAAYAAAAHAAAACAAAAA0AAAAbAAAAHAAAAB4AAAAgAAAAIQAAACIAAAAjAAAAJAAAACUA" +
+    "AAAmAAAAJwAAACgAAAAuAAAAMQAAADUAAAA3AAAABAAAAAAAAAAAAAAABgAAAAEAAAAAAAAABwAA" +
+    "AAIAAAAAAAAACAAAAAMAAAAAAAAACQAAAAMAAAC0DQAACgAAAAMAAAC8DQAACwAAAAMAAADMDQAA" +
+    "DAAAAAMAAADUDQAADAAAAAMAAADcDQAADQAAAAQAAAAAAAAADgAAAAQAAAC0DQAADwAAAAQAAADk" +
+    "DQAAEAAAAAQAAADMDQAAEQAAAAQAAADsDQAAEgAAAAQAAAD0DQAAFQAAAAoAAAC0DQAAFwAAAAoA" +
+    "AADsDQAAGAAAAAoAAAD0DQAAGQAAAAoAAAD8DQAAGgAAAAoAAAAEDgAAEwAAAA4AAAAAAAAAFQAA" +
+    "AA4AAAC0DQAAFgAAAA4AAADkDQAAFAAAAA8AAAAMDgAAFwAAAA8AAADsDQAAFQAAABAAAAC0DQAA" +
+    "LgAAABEAAAAAAAAAMQAAABIAAAAAAAAAMgAAABIAAAC0DQAAMwAAABIAAAAUDgAANAAAABIAAADs" +
+    "DQAANgAAABMAAADcDQAACgADAAUAAAAKAAQAKgAAAAoABAArAAAACgADAC8AAAAKAAcAMAAAAAoA" +
+    "BABXAAAACgAEAGMAAAAJAB4AAgAAAAoAGwABAAAACgAcAAIAAAAKAB4AAgAAAAoABAA5AAAACgAA" +
+    "ADoAAAAKAAYAOwAAAAoABwA8AAAACgAIADwAAAAKAAYAPQAAAAoAEAA+AAAACgAMAD8AAAAKAAEA" +
+    "QAAAAAoAHwBCAAAACgACAEMAAAAKAAUARAAAAAoAHQBFAAAACgAQAEYAAAAKABIARgAAAAoAEwBG" +
+    "AAAACgADAEcAAAAKAAQARwAAAAoACgBIAAAACgADAEkAAAAKAAkASgAAAAoACgBLAAAACgAMAEwA" +
+    "AAAKAAwATQAAAAoABABOAAAACgAEAE8AAAAKAA0AUAAAAAoADgBQAAAACgANAFEAAAAKAA4AUQAA" +
+    "AAoADABSAAAACgAKAFMAAAAKAAoAVAAAAAoACwBVAAAACgALAFYAAAAKABoAWAAAAAoABABZAAAA" +
+    "CgAEAFoAAAAKAAwAWwAAAAoAFQBcAAAACgAVAF0AAAAKABUAXgAAAAoAFABfAAAACgAVAF8AAAAK" +
+    "ABYAXwAAAAoAGQBgAAAACgAVAGEAAAAKABYAYQAAAAoAFgBiAAAACgAPAGQAAAAKABAAZAAAAAoA" +
+    "EQBkAAAACwAbAAIAAAAPABsAAgAAAA8AFwA4AAAADwAYADgAAAAPABQAXwAAAAoAAAARAAAACwAA" +
+    "AKwNAAApAAAAVA0AAFEUAABIFAAAAQAAACIUAAABAAAAMhQAAAEAAABAFAAAAAAAAAAAAACsEgAA" +
+    "AQAAAA4AAAAEAAMAAQAAALESAAAGAAAAcBA4AAEAWhIGAA4ABAACAAMAAAC6EgAADgAAABYAAABw" +
+    "MAIAAgEiAAkAGwEsAAAAcCAAABAAJwADAAIAAAAAAMISAAACAAAAElAPAAYABAACAAAAyBIAAAkA" +
+    "AAAiAAkAGwEsAAAAcCAAABAAJwAAAAYABAACAAAA0BIAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAA" +
+    "AAMAAQACAAAA2BIAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAYABAACAAAA3xIAAAkAAAAiAAkA" +
+    "GwEsAAAAcCAAABAAJwAAAAgABgACAAAA5xIAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAYABAAC" +
+    "AAAA8RIAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAMAAQACAAAA+RIAAAkAAAAiAAkAGwEsAAAA" +
+    "cCAAABAAJwAAAAUAAwACAAAAABMAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAQAAgACAAAACBMA" +
+    "AAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAQAAgACAAAAEBMAAAkAAAAiAAkAGwEsAAAAcCAAABAA" +
+    "JwAAAAQAAgAAAAAAFxMAAAQAAAAWAAEAuyAQAAQAAgAAAAAAHRMAAAUAAAAWAAEAnAACABAAAAAG" +
+    "AAQAAgAAACMTAAAJAAAAIgAJABsBLAAAAHAgAAAQACcAAAAGAAQAAgAAACsTAAAJAAAAIgAJABsB" +
+    "LAAAAHAgAAAQACcAAAAEAAIAAAAAADMTAAAEAAAAmwACAoQADwAEAAIAAAAAADkTAAAGAAAAEhCl" +
+    "AAIAwCCEAA8AAwABAAIAAAA/EwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABAACAAIAAABFEwAA" +
+    "CQAAACIACQAbASwAAABwIAAAEAAnAAAAAwABAAIAAABMEwAACQAAACIACQAbASwAAABwIAAAEAAn" +
+    "AAAABAACAAIAAABSEwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABgAEAAIAAABZEwAACQAAACIA" +
+    "CQAbASwAAABwIAAAEAAnAAAABAACAAAAAABhEwAAAgAAAH0gEAAEAAIAAAAAAGcTAAADAAAAFgAA" +
+    "ABAAAAADAAMAAAAAAG0TAAABAAAAEAAAAAUAAwAAAAAAdBMAAAQAAAAWAAoAvSAQAAMAAgAAAAAA" +
+    "exMAAAIAAAASAA8ABAACAAIAAACBEwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABgAEAAAAAACH" +
+    "EwAAAwAAAJsAAgQQAAAABAACAAIAAACPEwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABAACAAIA" +
+    "AACVEwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABAACAAIAAACbEwAACQAAACIACQAbASwAAABw" +
+    "IAAAEAAnAAAABAACAAIAAAChEwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABQADAAIAAACnEwAA" +
+    "CQAAACIACQAbASwAAABwIAAAEAAnAAAABAACAAIAAACuEwAACQAAACIACQAbASwAAABwIAAAEAAn" +
+    "AAAABAACAAIAAAC0EwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABQADAAIAAAC6EwAACQAAACIA" +
+    "CQAbASwAAABwIAAAEAAnAAAABQADAAIAAADBEwAACQAAACIACQAbASwAAABwIAAAEAAnAAAABAAC" +
+    "AAMAAADIEwAABgAAACIACgBwMAIAIAMRAAMAAQACAAAAzxMAAAkAAAAiAAkAGwEsAAAAcCAAABAA" +
+    "JwAAAAQAAgACAAAA1hMAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAMAAQACAAAA3hMAAAkAAAAi" +
+    "AAkAGwEsAAAAcCAAABAAJwAAAAQAAgACAAAA5BMAAAkAAAAiAAkAGwEsAAAAcCAAABAAJwAAAAMA" +
+    "AgACAAAA6xMAAAcAAAAfAgoAbiAHACEACgAPAAAAAwABAAIAAADyEwAACQAAACIACQAbASwAAABw" +
+    "IAAAEAAnAAAABAACAAIAAAD4EwAACQAAACIACQAbASwAAABwIAAAEAAnAAAAAwABAAIAAAD/EwAA" +
+    "CQAAACIACQAbASwAAABwIAAAEAAnAAAAAwABAAIAAAAFFAAACQAAACIACQAbASwAAABwIAAAEAAn" +
+    "AAAAAwABAAIAAAALFAAACQAAACIACQAbASwAAABwIAAAEAAnAAAAAwABAAAAAAARFAAAAwAAAFMg" +
+    "BgAQAAAAAwABAAIAAAAXFAAACQAAACIACQAbASwAAABwIAAAEAAnAAAABQABAAMAAAAdFAAAGAAA" +
+    "ACIADwBwEDkAAAAbAS0AAABuIDsAEAAMAFNCBgCGIm4wOgAgAwwAbhA8AAAADAARABgGAAABAAAA" +
+    "CAAAAAAAAAAEAAAAIAYAAAMAAAAoBgAACgAAACgGAAAeAAAAKAYAAB8AAAAoBgAAIAAAACgGAAAh" +
+    "AAAAKAYAADYAAAAoBgAANwAAACgGAAABAAAACAAAAAEAAAAEAAAABQAAAAQAAwAUAAMAAwAAAAIA" +
+    "AAAEAAQAAQAAAAoAAAABAAAADQAAAAIAAAAEAAMAAQAAAA4AAAACAAAADgADAAIAAAAOAAQAAgAA" +
+    "AA4ACgABAAAAAQAAAAMAAAAEAAMAFAABPAAIPGNsaW5pdD4ABjxpbml0PgACPjsAAUIABUJZVEVT" +
+    "AAFEAAFGAAFJAAJJSgAGSUpJTElJAANJSkoAAklMAAFKAAJKSgADSkpJAANKSkoAAkpMAANKTEkA" +
+    "AUwAAkxEAAJMSgADTEpJAAJMTAADTExJAANMTEoAA0xMTAAdTGRhbHZpay9hbm5vdGF0aW9uL1Np" +
+    "Z25hdHVyZTsAGkxkYWx2aWsvYW5ub3RhdGlvbi9UaHJvd3M7ABBMamF2YS9sYW5nL0NsYXNzABFM" +
+    "amF2YS9sYW5nL0NsYXNzOwAVTGphdmEvbGFuZy9Db21wYXJhYmxlABZMamF2YS9sYW5nL0NvbXBh" +
+    "cmFibGU7ABFMamF2YS9sYW5nL0Vycm9yOwAQTGphdmEvbGFuZy9Mb25nOwASTGphdmEvbGFuZy9O" +
+    "dW1iZXI7ACFMamF2YS9sYW5nL051bWJlckZvcm1hdEV4Y2VwdGlvbjsAEkxqYXZhL2xhbmcvT2Jq" +
+    "ZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABlMamF2YS9sYW5nL1N0cmluZ0J1aWxkZXI7ABZMamF2" +
+    "YS9tYXRoL0JpZ0ludGVnZXI7AAlMb25nLmphdmEACU1BWF9WQUxVRQAJTUlOX1ZBTFVFABZNZXRo" +
+    "b2QgcmVkZWZpbmVkIGF3YXkhACJSZWRlZmluZWQgTG9uZyEgdmFsdWUgKGFzIGRvdWJsZSk9AAFT" +
+    "AARTSVpFAARUWVBFAAFWAAJWSgAEVkpJTAACVkwAAVoAAlpMAAJbQwAGYXBwZW5kAAhiaXRDb3Vu" +
+    "dAAJYnl0ZVZhbHVlAAdjb21wYXJlAAljb21wYXJlVG8AD2NvbXBhcmVVbnNpZ25lZAAGZGVjb2Rl" +
+    "AA5kaXZpZGVVbnNpZ25lZAALZG91YmxlVmFsdWUAEmVtaXR0ZXI6IGphY2stNC4yNQAGZXF1YWxz" +
+    "AApmbG9hdFZhbHVlABJmb3JtYXRVbnNpZ25lZExvbmcACGdldENoYXJzAAdnZXRMb25nAAhoYXNo" +
+    "Q29kZQANaGlnaGVzdE9uZUJpdAAIaW50VmFsdWUACWxvbmdWYWx1ZQAMbG93ZXN0T25lQml0AANt" +
+    "YXgAA21pbgAUbnVtYmVyT2ZMZWFkaW5nWmVyb3MAFW51bWJlck9mVHJhaWxpbmdaZXJvcwAJcGFy" +
+    "c2VMb25nABFwYXJzZVVuc2lnbmVkTG9uZwARcmVtYWluZGVyVW5zaWduZWQAB3JldmVyc2UADHJl" +
+    "dmVyc2VCeXRlcwAKcm90YXRlTGVmdAALcm90YXRlUmlnaHQAEHNlcmlhbFZlcnNpb25VSUQACnNo" +
+    "b3J0VmFsdWUABnNpZ251bQAKc3RyaW5nU2l6ZQADc3VtAA50b0JpbmFyeVN0cmluZwALdG9IZXhT" +
+    "dHJpbmcADXRvT2N0YWxTdHJpbmcACHRvU3RyaW5nABR0b1Vuc2lnbmVkQmlnSW50ZWdlcgAQdG9V" +
+    "bnNpZ25lZFN0cmluZwARdG9VbnNpZ25lZFN0cmluZzAABXZhbHVlAAd2YWx1ZU9mAAUABw4AkAEB" +
+    "AAcOPC0AlQEBAAcOWgAiAQAHDgDNAQIAAAcOANEBAgAABw4AiwEBAAcOANUBAgAABw4AXQUAAAAA" +
+    "AAcOAGkDAAAABw4AvgEBAAcOAMIBAgAABw4AxgECAAAHDgC2AQEABw4ADgEABw4AEwEABw4A5AEC" +
+    "AAAHDgDnAQIAAAcOABgBAAcOAB0BAAcOAHUBAAcOAHECAAAHDgB9AQAHDgB5AgAABw4A2QECAAAH" +
+    "DgAxAQAHDgA7AQAHDgAnAgAABw4ALAIAAAcOADYBAAcOAG0BAAcOAOABAgAABw4AVQEABw4ATQEA" +
+    "Bw4AUQEABw4AYQEABw4AQwIAAAcOAEoBAAcOAGUBAAcOAEYCAAAHDgBZAgAABw4AhwEBAAcOAIQB" +
+    "AQAHDgCBAQIAAAcOAJoBAAcOAMkBAQAHDgDIAQEABw4ArgEABw4AugEBAAcOAKoBAAcOALIBAAcO" +
+    "AKIBAAcOAKYBAAcOAJ4BAAcOAD8ABw4AAgUBYxwFFyMXHxcAFyIXAwIFAWMcBBcdFwAXIhcDAgYB" +
+    "YxwBGAwEBAgGAAYABEAGASwLABkBGQEZARkBGQEaBhIBiIAEsAwBgYAExAwBgYAE4AwBCYwNAgmg" +
+    "DQMJxA0BCegNAQmMDgQIsA4BCNQOAQn4DgEJnA8BCcAPAgnkDwEJiBADCaAQAQm8EAEJ4BABCYQR" +
+    "AQmcEQEJuBEBCdwRAQmAEgEJpBIBCcgSAQnsEgEJgBMBCZgTAQmsEwIJxBMBCNgTAQn8EwEJlBQB" +
+    "CbgUAQncFAIJgBUBCaQVAQrIFQEJ7BUBCZAWAQi0FgEJ2BYBCfQWAQmYFwUBvBcCAeAXAcEghBgE" +
+    "AaQYAQHIGAEB7BgGAZAZAwG0GQEB2BkPAfAZBwGUGgAAEQAAAAAAAAABAAAAAAAAAAEAAABlAAAA" +
+    "cAAAAAIAAAAVAAAABAIAAAMAAAAgAAAAWAIAAAQAAAAHAAAA2AMAAAUAAAA9AAAAEAQAAAYAAAAB" +
+    "AAAA+AUAAAMQAAADAAAAGAYAAAEgAAA3AAAAMAYAAAYgAAABAAAAVA0AAAEQAAANAAAArA0AAAIg" +
+    "AABlAAAAHg4AAAMgAAA3AAAArBIAAAQgAAADAAAAIhQAAAUgAAABAAAASBQAAAAgAAABAAAAURQA" +
+    "AAAQAAABAAAASBUAAA==");
+
+  static class FuncCmp implements LongPredicate {
+    final String name;
+    final LongPredicate p;
+    public FuncCmp(String name, LongPredicate p) {
+      this.name = name;
+      this.p = p;
+    }
+
+    public boolean test(long l) {
+      return p.test(l);
+    }
+  }
+  static FuncCmp l2l(String name, final LongUnaryOperator a, final LongUnaryOperator b) {
+    return new FuncCmp(name, (v) -> a.applyAsLong(v) == b.applyAsLong(v));
+  }
+  static FuncCmp l2i(String name, final LongToIntFunction a, final LongToIntFunction b) {
+    return new FuncCmp(name, (v) -> a.applyAsInt(v) == b.applyAsInt(v));
+  }
+
+  /** Interface for a long, int -> long function. */
+  static interface LI2IFunction { public long applyToLongInt(long a, int b); }
+
+  static FuncCmp li2l(String name, final Random r, final LI2IFunction a, final LI2IFunction b) {
+    return new FuncCmp(name, new LongPredicate() {
+      public boolean test(long v) {
+        int i = r.nextInt();
+        return a.applyToLongInt(v, i) == b.applyToLongInt(v, i);
+      }
+    });
+  }
+
+  public static void main(String[] args) {
+    doTest(10000);
+  }
+
+  public static void doTest(int iters) {
+    // Just transform immediately.
+    doCommonClassRedefinition(Long.class, CLASS_BYTES, DEX_BYTES);
+    final Random r = new Random();
+    FuncCmp[] comps = new FuncCmp[] {
+      l2l("highestOneBit", Long::highestOneBit, RedefinedLongIntrinsics::highestOneBit),
+      l2l("lowestOneBit", Long::lowestOneBit, RedefinedLongIntrinsics::lowestOneBit),
+      l2i("numberOfLeadingZeros",
+          Long::numberOfLeadingZeros,
+          RedefinedLongIntrinsics::numberOfLeadingZeros),
+      l2i("numberOfTrailingZeros",
+          Long::numberOfTrailingZeros,
+          RedefinedLongIntrinsics::numberOfTrailingZeros),
+      l2i("bitCount", Long::bitCount, RedefinedLongIntrinsics::bitCount),
+      li2l("rotateLeft", r, Long::rotateLeft, RedefinedLongIntrinsics::rotateLeft),
+      li2l("rotateRight", r, Long::rotateRight, RedefinedLongIntrinsics::rotateRight),
+      l2l("reverse", Long::reverse, RedefinedLongIntrinsics::reverse),
+      l2i("signum", Long::signum, RedefinedLongIntrinsics::signum),
+      l2l("reverseBytes", Long::reverseBytes, RedefinedLongIntrinsics::reverseBytes),
+    };
+    for (FuncCmp f : comps) {
+      // Just actually use ints so we can cast them back int the tests to print them (since we
+      // deleted a bunch of the Long methods needed for printing longs)!
+      int failures = (int)r.ints(iters)
+                           .mapToLong((v) -> (long)v)
+                           .filter(f.negate()) // Get all the test cases that failed.
+                           .count();
+      if (failures != 0) {
+        double percent = 100.0d*((double)failures/(double)iters);
+        System.out.println("for intrinsic " + f.name + " " + failures + "/" + iters
+            + " (" + Double.toString(percent) + "%) tests failed!");
+      }
+    }
+    System.out.println("Finished!");
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] class_file,
+                                                       byte[] dex_file);
+}
diff --git a/test/950-redefine-intrinsic/src/RedefinedLongIntrinsics.java b/test/950-redefine-intrinsic/src/RedefinedLongIntrinsics.java
new file mode 100644
index 0000000..0ada4a6
--- /dev/null
+++ b/test/950-redefine-intrinsic/src/RedefinedLongIntrinsics.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The methods that are intrinsified in Long and their expected redefined implementations.
+ */
+class RedefinedLongIntrinsics {
+  // Intrinsic! Do something cool. Return i + 1
+  public static long highestOneBit(long i) {
+    return i + 1;
+  }
+
+  // Intrinsic! Do something cool. Return i - 1
+  public static long lowestOneBit(long i) {
+    return i - 1;
+  }
+
+  // Intrinsic! Do something cool. Return i + i
+  public static int numberOfLeadingZeros(long i) {
+    return (int)(i + i);
+  }
+
+  // Intrinsic! Do something cool. Return i & (i >>> 1);
+  public static int numberOfTrailingZeros(long i) {
+    return (int)(i & (i >>> 1));
+  }
+
+  // Intrinsic! Do something cool. Return 5
+  public static int bitCount(long i) {
+    return 5;
+  }
+
+  // Intrinsic! Do something cool. Return i
+  public static long rotateLeft(long i, int distance) {
+    return i;
+  }
+
+  // Intrinsic! Do something cool. Return 10 * i
+  public static long rotateRight(long i, int distance) {
+    return 10 * i;
+  }
+
+  // Intrinsic! Do something cool. Return -i
+  public static long reverse(long i) {
+    return -i;
+  }
+
+  // Intrinsic! Do something cool. Return 0
+  public static int signum(long i) {
+    return 0;
+  }
+
+  // Intrinsic! Do something cool. Return 0
+  public static long reverseBytes(long i) {
+    return 0;
+  }
+}
diff --git a/test/951-threaded-obsolete/expected.txt b/test/951-threaded-obsolete/expected.txt
new file mode 100644
index 0000000..83efda1
--- /dev/null
+++ b/test/951-threaded-obsolete/expected.txt
@@ -0,0 +1,9 @@
+hello
+Not doing anything here
+goodbye
+hello
+transforming calling function
+goodbye
+Hello - Transformed
+Not doing anything here
+Goodbye - Transformed
diff --git a/test/951-threaded-obsolete/info.txt b/test/951-threaded-obsolete/info.txt
new file mode 100644
index 0000000..e7ef4a2
--- /dev/null
+++ b/test/951-threaded-obsolete/info.txt
@@ -0,0 +1,4 @@
+Tests basic obsolete method support
+
+This test ensures that obsolete methods will work even if the obsolete method is
+on a different thread then where the redefinition was triggered.
diff --git a/test/577-profile-foreign-dex/run b/test/951-threaded-obsolete/run
old mode 100644
new mode 100755
similarity index 85%
rename from test/577-profile-foreign-dex/run
rename to test/951-threaded-obsolete/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/951-threaded-obsolete/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/951-threaded-obsolete/src/Main.java b/test/951-threaded-obsolete/src/Main.java
new file mode 100644
index 0000000..98e7236
--- /dev/null
+++ b/test/951-threaded-obsolete/src/Main.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+import java.util.concurrent.Semaphore;
+
+public class Main {
+  // class Transform {
+  //   public void sayHi(Runnable r) {
+  //     System.out.println("Hello - Transformed");
+  //     r.run();
+  //     System.out.println("Goodbye - Transformed");
+  //   }
+  // }
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAJAoACAARCQASABMIABQKABUAFgsAFwAYCAAZBwAaBwAbAQAGPGluaXQ+AQADKClW" +
+    "AQAEQ29kZQEAD0xpbmVOdW1iZXJUYWJsZQEABXNheUhpAQAXKExqYXZhL2xhbmcvUnVubmFibGU7" +
+    "KVYBAApTb3VyY2VGaWxlAQAOVHJhbnNmb3JtLmphdmEMAAkACgcAHAwAHQAeAQATSGVsbG8gLSBU" +
+    "cmFuc2Zvcm1lZAcAHwwAIAAhBwAiDAAjAAoBABVHb29kYnllIC0gVHJhbnNmb3JtZWQBAAlUcmFu" +
+    "c2Zvcm0BABBqYXZhL2xhbmcvT2JqZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEAA291dAEAFUxqYXZh" +
+    "L2lvL1ByaW50U3RyZWFtOwEAE2phdmEvaW8vUHJpbnRTdHJlYW0BAAdwcmludGxuAQAVKExqYXZh" +
+    "L2xhbmcvU3RyaW5nOylWAQASamF2YS9sYW5nL1J1bm5hYmxlAQADcnVuACAABwAIAAAAAAACAAAA" +
+    "CQAKAAEACwAAAB0AAQABAAAABSq3AAGxAAAAAQAMAAAABgABAAAAAQABAA0ADgABAAsAAAA7AAIA" +
+    "AgAAABeyAAISA7YABCu5AAUBALIAAhIGtgAEsQAAAAEADAAAABIABAAAAAMACAAEAA4ABQAWAAYA" +
+    "AQAPAAAAAgAQ");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQAYeAMMXgYWxoeSHAS9EWKCCtVRSAGpqZVQAwAAcAAAAHhWNBIAAAAAAAAAALACAAAR" +
+    "AAAAcAAAAAcAAAC0AAAAAwAAANAAAAABAAAA9AAAAAUAAAD8AAAAAQAAACQBAAAMAgAARAEAAKIB" +
+    "AACqAQAAwQEAANYBAADjAQAA+gEAAA4CAAAkAgAAOAIAAEwCAABcAgAAXwIAAGMCAAB3AgAAfAIA" +
+    "AIUCAACKAgAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACgAAAAoAAAAGAAAAAAAAAAsAAAAGAAAA" +
+    "lAEAAAsAAAAGAAAAnAEAAAUAAQANAAAAAAAAAAAAAAAAAAEAEAAAAAEAAgAOAAAAAgAAAAAAAAAD" +
+    "AAAADwAAAAAAAAAAAAAAAgAAAAAAAAAJAAAAAAAAAJ8CAAAAAAAAAQABAAEAAACRAgAABAAAAHAQ" +
+    "AwAAAA4ABAACAAIAAACWAgAAFAAAAGIAAAAbAQIAAABuIAIAEAByEAQAAwBiAAAAGwEBAAAAbiAC" +
+    "ABAADgABAAAAAwAAAAEAAAAEAAY8aW5pdD4AFUdvb2RieWUgLSBUcmFuc2Zvcm1lZAATSGVsbG8g" +
+    "LSBUcmFuc2Zvcm1lZAALTFRyYW5zZm9ybTsAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwASTGphdmEv" +
+    "bGFuZy9PYmplY3Q7ABRMamF2YS9sYW5nL1J1bm5hYmxlOwASTGphdmEvbGFuZy9TdHJpbmc7ABJM" +
+    "amF2YS9sYW5nL1N5c3RlbTsADlRyYW5zZm9ybS5qYXZhAAFWAAJWTAASZW1pdHRlcjogamFjay00" +
+    "LjEzAANvdXQAB3ByaW50bG4AA3J1bgAFc2F5SGkAAQAHDgADAQAHDoc8hwAAAAEBAICABMQCAQHc" +
+    "AgAAAA0AAAAAAAAAAQAAAAAAAAABAAAAEQAAAHAAAAACAAAABwAAALQAAAADAAAAAwAAANAAAAAE" +
+    "AAAAAQAAAPQAAAAFAAAABQAAAPwAAAAGAAAAAQAAACQBAAABIAAAAgAAAEQBAAABEAAAAgAAAJQB" +
+    "AAACIAAAEQAAAKIBAAADIAAAAgAAAJECAAAAIAAAAQAAAJ8CAAAAEAAAAQAAALACAAA=");
+
+  public static void main(String[] args) {
+    // Semaphores to let each thread know where the other is. We could use barriers but semaphores
+    // mean we don't need to have the worker thread be waiting around.
+    final Semaphore sem_redefine_start = new Semaphore(0);
+    final Semaphore sem_redefine_end = new Semaphore(0);
+    // Create a thread to do the actual redefinition. We will just communicate through an
+    // atomic-integer.
+    new Thread(() -> {
+      try {
+        // Wait for the other thread to ask for redefinition.
+        sem_redefine_start.acquire();
+        // Do the redefinition.
+        doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
+        // Allow the other thread to wake up if it is waiting.
+        sem_redefine_end.release();
+      } catch (InterruptedException e) {
+        throw new Error("unable to do redefinition", e);
+      }
+    }).start();
+
+    Transform t = new Transform();
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+    t.sayHi(() -> {
+      try {
+        System.out.println("transforming calling function");
+        // Wake up the waiting thread.
+        sem_redefine_start.release();
+        // Wait for the other thread to finish with redefinition.
+        sem_redefine_end.acquire();
+      } catch (InterruptedException e) {
+        throw new Error("unable to do redefinition", e);
+      }
+    });
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] classfile,
+                                                       byte[] dexfile);
+}
diff --git a/test/951-threaded-obsolete/src/Transform.java b/test/951-threaded-obsolete/src/Transform.java
new file mode 100644
index 0000000..8cda6cd
--- /dev/null
+++ b/test/951-threaded-obsolete/src/Transform.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Transform {
+  public void sayHi(Runnable r) {
+    // Use lower 'h' to make sure the string will have a different string id
+    // than the transformation (the transformation code is the same except
+    // the actual printed String, which was making the test inacurately passing
+    // in JIT mode when loading the string from the dex cache, as the string ids
+    // of the two different strings were the same).
+    // We know the string ids will be different because lexicographically:
+    // "Hello" < "LTransform;" < "hello".
+    System.out.println("hello");
+    r.run();
+    System.out.println("goodbye");
+  }
+}
diff --git a/test/961-default-iface-resolution-gen/build b/test/961-default-iface-resolution-gen/build
index ccebbe4..2f7e3ba 100755
--- a/test/961-default-iface-resolution-gen/build
+++ b/test/961-default-iface-resolution-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 if [[ $@ != *"--jvm"* ]]; then
   # Don't do anything with jvm
   # Hard-wired use of experimental jack.
@@ -39,6 +30,3 @@
 ./util-src/generate_java.py ./src ./expected.txt
 
 ./default-build "$@" --experimental default-methods
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/964-default-iface-init-gen/build b/test/964-default-iface-init-gen/build
index ccebbe4..2f7e3ba 100755
--- a/test/964-default-iface-init-gen/build
+++ b/test/964-default-iface-init-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 if [[ $@ != *"--jvm"* ]]; then
   # Don't do anything with jvm
   # Hard-wired use of experimental jack.
@@ -39,6 +30,3 @@
 ./util-src/generate_java.py ./src ./expected.txt
 
 ./default-build "$@" --experimental default-methods
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/968-default-partial-compile-gen/build b/test/968-default-partial-compile-gen/build
index 1e9f8aa..00ccb89 100755
--- a/test/968-default-partial-compile-gen/build
+++ b/test/968-default-partial-compile-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 # TODO: Support running with jack.
 
 if [[ $@ == *"--jvm"* ]]; then
@@ -45,6 +36,3 @@
   # Use the default build script
   ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
 fi
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/970-iface-super-resolution-gen/build b/test/970-iface-super-resolution-gen/build
index fd1b271..7217fac 100755
--- a/test/970-iface-super-resolution-gen/build
+++ b/test/970-iface-super-resolution-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 # Should we compile with Java source code. By default we will use Smali.
 USES_JAVA_SOURCE="false"
 if [[ $@ == *"--jvm"* ]]; then
@@ -50,6 +41,3 @@
 fi
 
 ./default-build "$@" --experimental default-methods
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/971-iface-super/build b/test/971-iface-super/build
index 1e9f8aa..00ccb89 100755
--- a/test/971-iface-super/build
+++ b/test/971-iface-super/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 # TODO: Support running with jack.
 
 if [[ $@ == *"--jvm"* ]]; then
@@ -45,6 +36,3 @@
   # Use the default build script
   ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
 fi
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/980-redefine-object/check b/test/980-redefine-object/check
new file mode 100755
index 0000000..987066f
--- /dev/null
+++ b/test/980-redefine-object/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The number of paused background threads (and therefore InterruptedExceptions)
+# can change so we will just delete their lines from the log.
+
+sed "/Object allocated of type 'Ljava\/lang\/InterruptedException;'/d" "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
diff --git a/test/980-redefine-object/expected.txt b/test/980-redefine-object/expected.txt
new file mode 100644
index 0000000..6e9bce0
--- /dev/null
+++ b/test/980-redefine-object/expected.txt
@@ -0,0 +1,52 @@
+	Initializing and loading the TestWatcher class that will (eventually) be notified of object allocations
+	Allocating an j.l.Object before redefining Object class
+	Allocating a Transform before redefining Object class
+	Redefining the Object class to add a hook into the <init> method
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating an j.l.Object after redefining Object class
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating a Transform after redefining Object class
+Object allocated of type 'LTransform;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating an int[] after redefining Object class
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating an array list
+Object allocated of type 'Ljava/util/ArrayList;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Adding a bunch of stuff to the array list
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'LTransform;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating a linked list
+Object allocated of type 'Ljava/util/LinkedList;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Adding a bunch of stuff to the linked list
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'LTransform;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Throwing from down 4 stack frames
+Object allocated of type 'Ljava/lang/Exception;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Exception caught.
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Finishing test!
diff --git a/test/980-redefine-object/info.txt b/test/980-redefine-object/info.txt
new file mode 100644
index 0000000..f3e01b5
--- /dev/null
+++ b/test/980-redefine-object/info.txt
@@ -0,0 +1,23 @@
+Tests basic functions in the jvmti plugin.
+
+This tests that we are able to redefine methods/constructors on the
+java.lang.Object class at runtime.
+
+This also (indirectly) tests that we correctly handle reading annotations on
+obsolete methods. This is something that is not normally done since there is no
+way to get a reference to an obsolete method outside of the runtime but some
+annotations on the Object class are read by the runtime directly.
+
+NB This test cannot be run on the RI at the moment.
+
+If this test starts failing during the doCommonClassRedefinition call it is
+possible that the definition of Object contained in the base64 DEX_BYTES array
+has become stale and will need to be recreated. The only difference from the
+normal Object dex bytes is that (a) it contains only the bytes of the Object
+class itself, and (b) it adds an
+'invoke-static {p0}, Ljava/lang/Object;->NotifyConstructed(Ljava/lang/Object;)V'
+to the <init> function.
+
+It is also possible it could fail due to the pattern of allocations caused by
+doing string concatenation or printing changing. In this case you should simply
+update the expected.txt file.
diff --git a/test/980-redefine-object/redefine_object.cc b/test/980-redefine-object/redefine_object.cc
new file mode 100644
index 0000000..daae087
--- /dev/null
+++ b/test/980-redefine-object/redefine_object.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <iostream>
+
+#include "android-base/stringprintf.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "jni.h"
+#include "jvmti.h"
+#include "ScopedUtfChars.h"
+
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test980RedefineObjects {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_bindFunctionsForClass(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass target) {
+  BindFunctionsOnClass(jvmti_env, env, target);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_test_TestWatcher_NotifyConstructed(
+    JNIEnv* env, jclass TestWatcherClass ATTRIBUTE_UNUSED, jobject constructed) {
+  char* sig = nullptr;
+  char* generic_sig = nullptr;
+  if (JvmtiErrorToException(env, jvmti_env->GetClassSignature(env->GetObjectClass(constructed),
+                                                              &sig,
+                                                              &generic_sig))) {
+    // Exception.
+    return;
+  }
+  std::cout << "Object allocated of type '" << sig << "'" << std::endl;
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(sig));
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(generic_sig));
+}
+
+}  // namespace Test980RedefineObjects
+}  // namespace art
diff --git a/test/577-profile-foreign-dex/run b/test/980-redefine-object/run
old mode 100644
new mode 100755
similarity index 85%
copy from test/577-profile-foreign-dex/run
copy to test/980-redefine-object/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/980-redefine-object/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/980-redefine-object/src-ex/TestWatcher.java b/test/980-redefine-object/src-ex/TestWatcher.java
new file mode 100644
index 0000000..d15e688
--- /dev/null
+++ b/test/980-redefine-object/src-ex/TestWatcher.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art.test;
+
+public class TestWatcher {
+  // NB This function is native since it is called in the Object.<init> method and so cannot cause
+  // any java allocations at all. The normal System.out.print* functions will cause allocations to
+  // occur so we cannot use them. This means the easiest way to report the object as being created
+  // is to go into native code and do it there.
+  public static native void NotifyConstructed(Object o);
+}
diff --git a/test/980-redefine-object/src/Main.java b/test/980-redefine-object/src/Main.java
new file mode 100644
index 0000000..348951c
--- /dev/null
+++ b/test/980-redefine-object/src/Main.java
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.LinkedList;
+
+public class Main {
+
+  // TODO We should make this run on the RI.
+  /**
+   * This test cannot be run on the RI.
+   */
+  private static final byte[] CLASS_BYTES = new byte[0];
+
+  // TODO It might be a good idea to replace this hard-coded Object definition with a
+  // retransformation based test.
+  /**
+   * Base64 encoding of the following smali file.
+   *
+   *  .class public Ljava/lang/Object;
+   *  .source "Object.java"
+   *  # instance fields
+   *  .field private transient shadow$_klass_:Ljava/lang/Class;
+   *      .annotation system Ldalvik/annotation/Signature;
+   *          value = {
+   *              "Ljava/lang/Class",
+   *              "<*>;"
+   *          }
+   *      .end annotation
+   *  .end field
+   *
+   *  .field private transient shadow$_monitor_:I
+   *  # direct methods
+   *  .method public constructor <init>()V
+   *      .registers 1
+   *      .prologue
+   *      invoke-static {p0}, Lart/test/TestWatcher;->NotifyConstructed(Ljava/lang/Object;)V
+   *      return-void
+   *  .end method
+   *
+   *  .method static identityHashCode(Ljava/lang/Object;)I
+   *      .registers 7
+   *      .prologue
+   *      iget v0, p0, Ljava/lang/Object;->shadow$_monitor_:I
+   *      const/high16 v3, -0x40000000    # -2.0f
+   *      const/high16 v2, -0x80000000
+   *      const v1, 0xfffffff
+   *      const/high16 v4, -0x40000000    # -2.0f
+   *      and-int/2addr v4, v0
+   *      const/high16 v5, -0x80000000
+   *      if-ne v4, v5, :cond_15
+   *      const v4, 0xfffffff
+   *      and-int/2addr v4, v0
+   *      return v4
+   *      :cond_15
+   *      invoke-static {p0}, Ljava/lang/Object;->identityHashCodeNative(Ljava/lang/Object;)I
+   *      move-result v4
+   *      return v4
+   *  .end method
+   *
+   *  .method private static native identityHashCodeNative(Ljava/lang/Object;)I
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method private native internalClone()Ljava/lang/Object;
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *
+   *  # virtual methods
+   *  .method protected clone()Ljava/lang/Object;
+   *      .registers 4
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/CloneNotSupportedException;
+   *          }
+   *      .end annotation
+   *
+   *      .prologue
+   *      instance-of v0, p0, Ljava/lang/Cloneable;
+   *      if-nez v0, :cond_2d
+   *      new-instance v0, Ljava/lang/CloneNotSupportedException;
+   *      new-instance v1, Ljava/lang/StringBuilder;
+   *      invoke-direct {v1}, Ljava/lang/StringBuilder;-><init>()V
+   *      const-string/jumbo v2, "Class "
+   *      invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v1
+   *      invoke-virtual {p0}, Ljava/lang/Object;->getClass()Ljava/lang/Class;
+   *      move-result-object v2
+   *      invoke-virtual {v2}, Ljava/lang/Class;->getName()Ljava/lang/String;
+   *      move-result-object v2
+   *      invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v1
+   *      const-string/jumbo v2, " doesn\'t implement Cloneable"
+   *      invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v1
+   *      invoke-virtual {v1}, Ljava/lang/StringBuilder;->toString()Ljava/lang/String;
+   *      move-result-object v1
+   *      invoke-direct {v0, v1}, Ljava/lang/CloneNotSupportedException;-><init>(Ljava/lang/String;)V
+   *      throw v0
+   *      :cond_2d
+   *      invoke-direct {p0}, Ljava/lang/Object;->internalClone()Ljava/lang/Object;
+   *      move-result-object v0
+   *      return-object v0
+   *  .end method
+   *
+   *  .method public equals(Ljava/lang/Object;)Z
+   *      .registers 3
+   *      .prologue
+   *      if-ne p0, p1, :cond_4
+   *      const/4 v0, 0x1
+   *      :goto_3
+   *      return v0
+   *      :cond_4
+   *      const/4 v0, 0x0
+   *      goto :goto_3
+   *  .end method
+   *
+   *  .method protected finalize()V
+   *      .registers 1
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/Throwable;
+   *          }
+   *      .end annotation
+   *      .prologue
+   *      return-void
+   *  .end method
+   *
+   *  .method public final getClass()Ljava/lang/Class;
+   *      .registers 2
+   *      .annotation system Ldalvik/annotation/Signature;
+   *          value = {
+   *              "()",
+   *              "Ljava/lang/Class",
+   *              "<*>;"
+   *          }
+   *      .end annotation
+   *      .prologue
+   *      iget-object v0, p0, Ljava/lang/Object;->shadow$_klass_:Ljava/lang/Class;
+   *      return-object v0
+   *  .end method
+   *
+   *  .method public hashCode()I
+   *      .registers 2
+   *      .prologue
+   *      invoke-static {p0}, Ljava/lang/Object;->identityHashCode(Ljava/lang/Object;)I
+   *      move-result v0
+   *      return v0
+   *  .end method
+   *
+   *  .method public final native notify()V
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method public final native notifyAll()V
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method public toString()Ljava/lang/String;
+   *      .registers 3
+   *      .prologue
+   *      new-instance v0, Ljava/lang/StringBuilder;
+   *      invoke-direct {v0}, Ljava/lang/StringBuilder;-><init>()V
+   *      invoke-virtual {p0}, Ljava/lang/Object;->getClass()Ljava/lang/Class;
+   *      move-result-object v1
+   *      invoke-virtual {v1}, Ljava/lang/Class;->getName()Ljava/lang/String;
+   *      move-result-object v1
+   *      invoke-virtual {v0, v1}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v0
+   *      const-string/jumbo v1, "@"
+   *      invoke-virtual {v0, v1}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v0
+   *      invoke-virtual {p0}, Ljava/lang/Object;->hashCode()I
+   *      move-result v1
+   *      invoke-static {v1}, Ljava/lang/Integer;->toHexString(I)Ljava/lang/String;
+   *      move-result-object v1
+   *      invoke-virtual {v0, v1}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v0
+   *      invoke-virtual {v0}, Ljava/lang/StringBuilder;->toString()Ljava/lang/String;
+   *      move-result-object v0
+   *      return-object v0
+   *  .end method
+   *
+   *  .method public final native wait()V
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/InterruptedException;
+   *          }
+   *      .end annotation
+   *
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method public final wait(J)V
+   *      .registers 4
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/InterruptedException;
+   *          }
+   *      .end annotation
+   *      .prologue
+   *      const/4 v0, 0x0
+   *      invoke-virtual {p0, p1, p2, v0}, Ljava/lang/Object;->wait(JI)V
+   *      return-void
+   *  .end method
+   *
+   *  .method public final native wait(JI)V
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/InterruptedException;
+   *          }
+   *      .end annotation
+   *
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   */
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+      "ZGV4CjAzNQDUlMR9j03MYuOKekKs2p7zJzu2IfDb7RlMCgAAcAAAAHhWNBIAAAAAAAAAAIgJAAA6" +
+      "AAAAcAAAABEAAABYAQAADQAAAJwBAAACAAAAOAIAABYAAABIAgAAAQAAAPgCAAA0BwAAGAMAABgD" +
+      "AAA2AwAAOgMAAEADAABIAwAASwMAAFMDAABWAwAAWgMAAF0DAABgAwAAZAMAAGgDAACAAwAAnwMA" +
+      "ALsDAADoAwAA+gMAAA0EAAA1BAAATAQAAGEEAACDBAAAlwQAAKsEAADGBAAA3QQAAPAEAAD9BAAA" +
+      "AAUAAAQFAAAJBQAADQUAABAFAAAUBQAAHAUAACMFAAArBQAANQUAAD8FAABIBQAAUgUAAGQFAAB8" +
+      "BQAAiwUAAJUFAACnBQAAugUAAM0FAADVBQAA3QUAAOgFAADtBQAA/QUAAA8GAAAcBgAAJgYAAC0G" +
+      "AAAGAAAACAAAAAwAAAANAAAADgAAAA8AAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAA" +
+      "ABkAAAAcAAAAIAAAAAYAAAAAAAAAAAAAAAcAAAAAAAAAPAYAAAkAAAAGAAAAAAAAAAkAAAALAAAA" +
+      "AAAAAAkAAAAMAAAAAAAAAAoAAAAMAAAARAYAAAsAAAANAAAAVAYAABwAAAAPAAAAAAAAAB0AAAAP" +
+      "AAAATAYAAB4AAAAPAAAANAYAAB8AAAAPAAAAPAYAAB8AAAAPAAAAVAYAACEAAAAQAAAAPAYAAAsA" +
+      "BgA0AAAACwAAADUAAAACAAoAGgAAAAYABAAnAAAABwALAAMAAAAJAAUANgAAAAsABwADAAAACwAD" +
+      "ACMAAAALAAwAJAAAAAsABwAlAAAACwACACYAAAALAAAAKAAAAAsAAQApAAAACwABACoAAAALAAMA" +
+      "KwAAAAsABwAxAAAACwAHADIAAAALAAQANwAAAAsABwA5AAAACwAIADkAAAALAAkAOQAAAA0ABwAD" +
+      "AAAADQAGACIAAAANAAQANwAAAAsAAAABAAAA/////wAAAAAbAAAA0AYAAD4JAAAAAAAAHCBkb2Vz" +
+      "bid0IGltcGxlbWVudCBDbG9uZWFibGUAAigpAAQ8Kj47AAY8aW5pdD4AAUAABkNsYXNzIAABSQAC" +
+      "SUwAAUoAAUwAAkxJAAJMTAAWTGFydC90ZXN0L1Rlc3RXYXRjaGVyOwAdTGRhbHZpay9hbm5vdGF0" +
+      "aW9uL1NpZ25hdHVyZTsAGkxkYWx2aWsvYW5ub3RhdGlvbi9UaHJvd3M7ACtMZGFsdmlrL2Fubm90" +
+      "YXRpb24vb3B0aW1pemF0aW9uL0Zhc3ROYXRpdmU7ABBMamF2YS9sYW5nL0NsYXNzABFMamF2YS9s" +
+      "YW5nL0NsYXNzOwAmTGphdmEvbGFuZy9DbG9uZU5vdFN1cHBvcnRlZEV4Y2VwdGlvbjsAFUxqYXZh" +
+      "L2xhbmcvQ2xvbmVhYmxlOwATTGphdmEvbGFuZy9JbnRlZ2VyOwAgTGphdmEvbGFuZy9JbnRlcnJ1" +
+      "cHRlZEV4Y2VwdGlvbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABlM" +
+      "amF2YS9sYW5nL1N0cmluZ0J1aWxkZXI7ABVMamF2YS9sYW5nL1Rocm93YWJsZTsAEU5vdGlmeUNv" +
+      "bnN0cnVjdGVkAAtPYmplY3QuamF2YQABVgACVkoAA1ZKSQACVkwAAVoAAlpMAAZhcHBlbmQABWNs" +
+      "b25lAAZlcXVhbHMACGZpbmFsaXplAAhnZXRDbGFzcwAHZ2V0TmFtZQAIaGFzaENvZGUAEGlkZW50" +
+      "aXR5SGFzaENvZGUAFmlkZW50aXR5SGFzaENvZGVOYXRpdmUADWludGVybmFsQ2xvbmUACGxvY2tX" +
+      "b3JkABBsb2NrV29yZEhhc2hNYXNrABFsb2NrV29yZFN0YXRlSGFzaAARbG9ja1dvcmRTdGF0ZU1h" +
+      "c2sABm1pbGxpcwAGbm90aWZ5AAlub3RpZnlBbGwAA29iagAOc2hhZG93JF9rbGFzc18AEHNoYWRv" +
+      "dyRfbW9uaXRvcl8AC3RvSGV4U3RyaW5nAAh0b1N0cmluZwAFdmFsdWUABHdhaXQAAAIAAAABAAAA" +
+      "AQAAAAsAAAABAAAAAAAAAAEAAAABAAAAAQAAAAwAAgQBOBwBGAcCBAE4HAEYCgIDATgcAhcQFwIC" +
+      "BAE4HAEYDgAFAAIDATgcAxcBFxAXAgAAAAAAAAAAAAEAAABaBgAAAgAAAGIGAAB8BgAAAQAAAGIG" +
+      "AAABAAAAagYAAAEAAAB0BgAAAQAAAHwGAAABAAAAfwYAAAAAAAABAAAACgAAAAAAAAAAAAAAsAYA" +
+      "AAUAAACUBgAABwAAALgGAAAIAAAAyAYAAAsAAADABgAADAAAAMAGAAANAAAAwAYAAA4AAADABgAA" +
+      "EAAAAJwGAAARAAAAqAYAABIAAACcBgAAKAAHDgBwATQHDi0DAC0BLQMDMAEtAwIvATwDAS4BeFsA" +
+      "7AEABw5LARoPOsYArAEBNAcOAMUEAAcOAEEABw4AaAAHDgCRAgAHDgCmAwExBw5LAAAAAQABAAEA" +
+      "AAA4BwAABAAAAHEQAAAAAA4ABwABAAEAAAA9BwAAGgAAAFJgAQAVAwDAFQIAgBQB////DxUEAMC1" +
+      "BBUFAIAzVAcAFAT///8PtQQPBHEQCwAGAAoEDwQEAAEAAgAAAFkHAAAyAAAAIDAIADkAKwAiAAcA" +
+      "IgENAHAQEwABABsCBQAAAG4gFAAhAAwBbhAIAAMADAJuEAEAAgAMAm4gFAAhAAwBGwIAAAAAbiAU" +
+      "ACEADAFuEBUAAQAMAXAgAgAQACcAcBAMAAMADAARAAMAAgAAAAAAZQcAAAYAAAAzIQQAEhAPABIA" +
+      "KP4BAAEAAAAAAGwHAAABAAAADgAAAAIAAQAAAAAAcgcAAAMAAABUEAAAEQAAAAIAAQABAAAAdwcA" +
+      "AAUAAABxEAoAAQAKAA8AAAADAAEAAgAAAHwHAAApAAAAIgANAHAQEwAAAG4QCAACAAwBbhABAAEA" +
+      "DAFuIBQAEAAMABsBBAAAAG4gFAAQAAwAbhAJAAIACgFxEAMAAQAMAW4gFAAQAAwAbhAVAAAADAAR" +
+      "AAAABAADAAQAAACCBwAABQAAABIAbkASACEDDgAAAgQLAIIBAYIBBIGABIwPBgikDwGKAgABggIA" +
+      "BQToDwEB3BABBPgQARGMEQEBpBEEkQIAAZECAAEBwBEBkQIAARGkEgGRAgAAABAAAAAAAAAAAQAA" +
+      "AAAAAAABAAAAOgAAAHAAAAACAAAAEQAAAFgBAAADAAAADQAAAJwBAAAEAAAAAgAAADgCAAAFAAAA" +
+      "FgAAAEgCAAAGAAAAAQAAAPgCAAACIAAAOgAAABgDAAABEAAABQAAADQGAAAEIAAABgAAAFoGAAAD" +
+      "EAAACQAAAIwGAAAGIAAAAQAAANAGAAADIAAACQAAADgHAAABIAAACQAAAIwHAAAAIAAAAQAAAD4J" +
+      "AAAAEAAAAQAAAIgJAAA=");
+
+  private static final String LISTENER_LOCATION =
+      System.getenv("DEX_LOCATION") + "/980-redefine-object-ex.jar";
+
+  public static void main(String[] args) {
+    doTest();
+  }
+
+  private static void ensureTestWatcherInitialized() {
+    try {
+      // Make sure the TestWatcher class can be found from the Object <init> function.
+      addToBootClassLoader(LISTENER_LOCATION);
+      // Load TestWatcher from the bootclassloader and make sure it is initialized.
+      Class<?> testwatcher_class = Class.forName("art.test.TestWatcher", true, null);
+      // Bind the native functions of testwatcher_class.
+      bindFunctionsForClass(testwatcher_class);
+    } catch (Exception e) {
+      throw new Error("Exception while making testwatcher", e);
+    }
+  }
+
+  // NB This function will cause 2 objects of type "Ljava/nio/HeapCharBuffer;" and
+  // "Ljava/nio/HeapCharBuffer;" to be allocated each time it is called.
+  private static void safePrintln(Object o) {
+    System.out.flush();
+    System.out.print("\t" + o + "\n");
+    System.out.flush();
+  }
+
+  private static void throwFrom(int depth) throws Exception {
+    if (depth <= 0) {
+      throw new Exception("Throwing the exception");
+    } else {
+      throwFrom(depth - 1);
+    }
+  }
+
+  public static void doTest() {
+    safePrintln("Initializing and loading the TestWatcher class that will (eventually) be " +
+                "notified of object allocations");
+    // Make sure the TestWatcher class is initialized before we do anything else.
+    ensureTestWatcherInitialized();
+    safePrintln("Allocating an j.l.Object before redefining Object class");
+    // Make sure these aren't shown.
+    Object o = new Object();
+    safePrintln("Allocating a Transform before redefining Object class");
+    Transform t = new Transform();
+
+    // Redefine the Object Class.
+    safePrintln("Redefining the Object class to add a hook into the <init> method");
+    doCommonClassRedefinition(Object.class, CLASS_BYTES, DEX_BYTES);
+
+    safePrintln("Allocating an j.l.Object after redefining Object class");
+    Object o2 = new Object();
+    safePrintln("Allocating a Transform after redefining Object class");
+    Transform t2 = new Transform();
+
+    // This shouldn't cause the Object constructor to be run.
+    safePrintln("Allocating an int[] after redefining Object class");
+    int[] abc = new int[12];
+
+    // Try adding stuff to an array list.
+    safePrintln("Allocating an array list");
+    ArrayList<Object> al = new ArrayList<>();
+    safePrintln("Adding a bunch of stuff to the array list");
+    al.add(new Object());
+    al.add(new Object());
+    al.add(o2);
+    al.add(o);
+    al.add(t);
+    al.add(t2);
+    al.add(new Transform());
+
+    // Try adding stuff to a LinkedList
+    safePrintln("Allocating a linked list");
+    LinkedList<Object> ll = new LinkedList<>();
+    safePrintln("Adding a bunch of stuff to the linked list");
+    ll.add(new Object());
+    ll.add(new Object());
+    ll.add(o2);
+    ll.add(o);
+    ll.add(t);
+    ll.add(t2);
+    ll.add(new Transform());
+
+    // Try making an exception.
+    safePrintln("Throwing from down 4 stack frames");
+    try {
+      throwFrom(4);
+    } catch (Exception e) {
+      safePrintln("Exception caught.");
+    }
+
+    safePrintln("Finishing test!");
+  }
+
+  private static native void addToBootClassLoader(String s);
+
+  private static native void bindFunctionsForClass(Class<?> target);
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] class_file,
+                                                       byte[] dex_file);
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/980-redefine-object/src/Transform.java
similarity index 95%
rename from test/577-profile-foreign-dex/src-ex/OtherDex.java
rename to test/980-redefine-object/src/Transform.java
index cba73b3..23f67d9 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/980-redefine-object/src/Transform.java
@@ -13,5 +13,5 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
-}
+
+class Transform { }
diff --git a/test/Android.bp b/test/Android.bp
index 00c890a..4ebfd74 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -275,10 +275,12 @@
         "936-search-onload/search_onload.cc",
         "944-transform-classloaders/classloader.cc",
         "945-obsolete-native/obsolete_native.cc",
+        "980-redefine-object/redefine_object.cc",
     ],
     shared_libs: [
         "libbase",
     ],
+    header_libs: ["libopenjdkjvmti_headers"],
 }
 
 art_cc_test_library {
@@ -335,6 +337,7 @@
         "596-monitor-inflation/monitor_inflation.cc",
         "597-deopt-new-string/deopt.cc",
         "626-const-class-linking/clear_dex_cache_types.cc",
+        "642-fp-callees/fp_callees.cc",
     ],
     shared_libs: [
         "libbacktrace",
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 95967b5..01eb14e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -222,6 +222,7 @@
 $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_')
 endef  # name-to-var
 
+# Disable 115-native-bridge, it fails when run through make b/35984597.
 # Disable 153-reference-stress temporarily until a fix arrives. b/33389022.
 # Disable 080-oom-fragmentation due to flakes. b/33795328
 # Disable 497-inlining-and-class-loader and 542-unresolved-access-check until
@@ -229,6 +230,7 @@
 #     register a dex file that's already registered with a different loader.
 #     b/34193123
 ART_TEST_RUN_TEST_SKIP += \
+  115-native-bridge \
   153-reference-stress \
   080-oom-fragmentation \
   497-inlining-and-class-loader \
@@ -240,10 +242,8 @@
 
 
 # Disable 149-suspend-all-stress, its output is flaky (b/28988206).
-# Disable 577-profile-foreign-dex (b/27454772).
 TEST_ART_BROKEN_ALL_TARGET_TESTS := \
   149-suspend-all-stress \
-  577-profile-foreign-dex \
 
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
     $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
@@ -368,6 +368,7 @@
 # Tests that are broken with GC stress.
 # * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
 #   hope the second process got into the expected state. The slowness of gcstress makes this bad.
+# * 152-dead-large-object requires a heap larger than what gcstress uses.
 # * 908-gc-start-finish expects GCs only to be run at clear points. The reduced heap size makes
 #   this non-deterministic. Same for 913.
 # * 961-default-iface-resolution-gen and 964-default-iface-init-genare very long tests that often
@@ -375,6 +376,7 @@
 #   slows down allocations significantly which these tests do a lot.
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
   137-cfi \
+  152-dead-large-object \
   154-gc-loop \
   908-gc-start-finish \
   913-heaps \
@@ -525,12 +527,14 @@
 # Known broken tests for the JIT.
 # CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT
 # also uses Generic JNI instead of the JNI compiler.
+# 154-gc-loop requires more deterministic GC behavior than what JIT does.
 # Test 906 iterates the heap filtering with different options. No instances should be created
 # between those runs to be able to have precise checks.
 # Test 629 requires compilation.
 # 912: b/34655682
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
   137-cfi \
+  154-gc-loop \
   629-vdex-speed \
   904-object-allocation \
   906-iterate-heap \
@@ -620,16 +624,18 @@
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
 # Tests failing in non-Baker read barrier configurations with the Optimizing compiler (AOT).
-# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+# 537 and 641: Expect an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
 #      handled in non-Baker read barrier configurations.
 TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS := \
-  537-checker-arraycopy
+  537-checker-arraycopy \
+  641-checker-arraycopy
 
 # Tests failing in non-Baker read barrier configurations with JIT (Optimizing compiler).
-# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+# 537 and 641: Expect an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
 #      handled in non-Baker read barrier configurations.
 TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS := \
-  537-checker-arraycopy
+  537-checker-arraycopy \
+  641-checker-arraycopy
 
 ifeq ($(ART_USE_READ_BARRIER),true)
   ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
@@ -810,6 +816,12 @@
 TEST_ART_TARGET_SYNC_DEPS += libopenjdkjvmti
 TEST_ART_TARGET_SYNC_DEPS += libopenjdkjvmtid
 
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/okhttp-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/bouncycastle-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/conscrypt-testdex.jar
+
 # All tests require the host executables. The tests also depend on the core images, but on
 # specific version depending on the compiler.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
diff --git a/test/ProfileTestMultiDex/Main.java b/test/ProfileTestMultiDex/Main.java
index 41532ea..a8ced54 100644
--- a/test/ProfileTestMultiDex/Main.java
+++ b/test/ProfileTestMultiDex/Main.java
@@ -25,3 +25,45 @@
     return "C";
   }
 }
+
+class TestInline {
+  public int inlineMonomorphic(Super s) {
+    return s.getValue();
+  }
+
+  public int inlinePolymorphic(Super s) {
+    return s.getValue();
+  }
+
+  public int inlineMegamorphic(Super s) {
+    return s.getValue();
+  }
+
+  public int inlineMissingTypes(Super s) {
+    return s.getValue();
+  }
+
+  public int noInlineCache(Super s) {
+    return s.getValue();
+  }
+}
+
+abstract class Super {
+  abstract int getValue();
+}
+
+class SubA extends Super {
+  int getValue() { return 42; }
+}
+
+class SubB extends Super {
+  int getValue() { return 38; };
+}
+
+class SubD extends Super {
+  int getValue() { return 20; };
+}
+
+class SubE extends Super {
+  int getValue() { return 16; };
+}
diff --git a/test/ProfileTestMultiDex/Second.java b/test/ProfileTestMultiDex/Second.java
index 4ac5abc..4b3c7a4 100644
--- a/test/ProfileTestMultiDex/Second.java
+++ b/test/ProfileTestMultiDex/Second.java
@@ -25,3 +25,8 @@
     return "Z";
   }
 }
+
+class SubC extends Super {
+  int getValue() { return 24; }
+}
+
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
index f2e3b4e..5e55e96 100644
--- a/test/ProfileTestMultiDex/main.jpp
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -1,3 +1,21 @@
-main:
+Main:
   @@com.android.jack.annotations.ForceInMainDex
-  class Second
+  class Main
+TestInqline:
+  @@com.android.jack.annotations.ForceInMainDex
+  class TestInline
+Super:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Super
+SubA:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubA
+SubB:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubB
+SubD:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubD
+SubE:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubE
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
index 44ba78e..ec131f0 100644
--- a/test/ProfileTestMultiDex/main.list
+++ b/test/ProfileTestMultiDex/main.list
@@ -1 +1,7 @@
 Main.class
+TestInline.class
+Super.class
+SubA.class
+SubB.class
+SubD.class
+SubE.class
diff --git a/test/577-profile-foreign-dex/run b/test/VerifierDeps/MySub1SoftVerificationFailure.smali
similarity index 76%
copy from test/577-profile-foreign-dex/run
copy to test/VerifierDeps/MySub1SoftVerificationFailure.smali
index ad57d14..8123394 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/VerifierDeps/MySub1SoftVerificationFailure.smali
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+.class public LMySub1SoftVerificationFailure;
+.super LMySoftVerificationFailure;
diff --git a/test/577-profile-foreign-dex/run b/test/VerifierDeps/MySub2SoftVerificationFailure.smali
similarity index 76%
copy from test/577-profile-foreign-dex/run
copy to test/VerifierDeps/MySub2SoftVerificationFailure.smali
index ad57d14..8d00323 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/VerifierDeps/MySub2SoftVerificationFailure.smali
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+.class public LMySub2SoftVerificationFailure;
+.super LMySoftVerificationFailure;
diff --git a/test/577-profile-foreign-dex/run b/test/VerifierDepsMulti/MySoftVerificationFailure.smali
similarity index 64%
copy from test/577-profile-foreign-dex/run
copy to test/VerifierDepsMulti/MySoftVerificationFailure.smali
index ad57d14..6b56a3b 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/VerifierDepsMulti/MySoftVerificationFailure.smali
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+.class public LMySoftVerificationFailure;
+.super Ljava/lang/Object;
+
+.method public final foo()V
+  .registers 1
+  sget-object v0, LMySoftVerificationFailure;->error:LUnknownType;
+  throw v0
+.end method
+
+.field public static error:LUnknownType;
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index a841f9e..c7a57ce 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -180,6 +180,9 @@
   }
 
   jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  // Update the code cache to make sure the JIT code does not get deleted.
+  // Note: this will apply to all JIT compilations.
+  code_cache->SetGarbageCollectCode(false);
   while (true) {
     const void* pc = method->GetEntryPointFromQuickCompiledCode();
     if (code_cache->ContainsPc(pc)) {
diff --git a/test/etc/default-build b/test/etc/default-build
index 4318966..d74b24d 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -97,7 +97,7 @@
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
-    on="$1"
+    option="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
   elif [ "x$1" = "x--jvm" ]; then
@@ -209,9 +209,9 @@
     ${JACK} --import classes.jill.jar --output-dex .
   else
     if [ ${NEED_DEX} = "true" ]; then
-      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 ${DX_FLAGS} classes-ex
       zip ${TEST_NAME}-ex.jar classes.dex
-      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 ${DX_FLAGS} classes
     fi
   fi
 else
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index f3d4332..808e58a 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -63,6 +63,7 @@
 TEST_IS_NDEBUG="n"
 APP_IMAGE="y"
 VDEX_FILTER=""
+PROFILE="n"
 
 # if "y", run 'sync' before dalvikvm to make sure all files from
 # build step (e.g. dex2oat) were finished writing.
@@ -269,6 +270,9 @@
     elif [ "x$1" = "x--sync" ]; then
         SYNC_BEFORE_RUN="y"
         shift
+    elif [ "x$1" = "x--profile" ]; then
+        PROFILE="y"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -371,6 +375,20 @@
 
 
 if [ "$HAVE_IMAGE" = "n" ]; then
+    if [ "${HOST}" = "y" ]; then
+        framework="${ANDROID_HOST_OUT}/framework"
+        bpath_suffix="-hostdex"
+    else
+        framework="${ANDROID_ROOT}/framework"
+        bpath_suffix="-testdex"
+    fi
+    bpath="${framework}/core-libart${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
+    # Pass down the bootclasspath
+    FLAGS="${FLAGS} -Xbootclasspath:${bpath}"
     # Add 5 minutes to give some time to generate the boot image.
     TIME_OUT_VALUE=$((${TIME_OUT_VALUE} + 300))
     DALVIKVM_BOOT_OPT="-Ximage:/system/non-existant/core.art"
@@ -426,25 +444,11 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
+COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate"
 if [ "$RELOCATE" = "y" ]; then
-    COMPILE_FLAGS="${COMPILE_FLAGS} --include-patch-information --runtime-arg -Xnorelocate"
-    FLAGS="${FLAGS} -Xrelocate -Xcompiler-option --include-patch-information"
-    if [ "$HOST" = "y" ]; then
-        # Run test sets a fairly draconian ulimit that we will likely blow right over
-        # since we are relocating. Get the total size of the /system/framework directory
-        # in 512 byte blocks and set it as the ulimit. This should be more than enough
-        # room.
-        if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
-          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework 2>/dev/null | tail -1 | cut -f1) || exit 1
-        fi
-    fi
+    FLAGS="${FLAGS} -Xrelocate"
 else
     FLAGS="$FLAGS -Xnorelocate"
-    COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate"
-    if [ "$HOST" = "y" ]; then
-        # Increase ulimit to 64MB in case we are running hprof test.
-        ulimit -S 64000 || exit 1
-    fi
 fi
 
 if [ "$HOST" = "n" ]; then
@@ -490,16 +494,28 @@
 DEX_LOCATION_STRIPPED="${DEX_LOCATION#/}"
 VDEX_NAME="${DEX_LOCATION_STRIPPED//\//@}@$TEST_NAME.jar@classes.vdex"
 if [ ${#VDEX_NAME} -gt $max_filename_size ]; then
-    echo  "Dex location path too long."
+    echo "Dex location path too long:"
+    echo "$VDEX_NAME is ${#VDEX_NAME} character long, and the limit is $max_filename_size."
     exit 1
 fi
 
+profman_cmdline="true"
 dex2oat_cmdline="true"
 vdex_cmdline="true"
 mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
 strip_cmdline="true"
 sync_cmdline="true"
 
+if [ "$PROFILE" = "y" ]; then
+  profman_cmdline="${ANDROID_ROOT}/bin/profman  \
+    --apk=$DEX_LOCATION/$TEST_NAME.jar \
+    --dex-location=$DEX_LOCATION/$TEST_NAME.jar \
+    --create-profile-from=$DEX_LOCATION/profile \
+    --reference-profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  COMPILE_FLAGS="${COMPILE_FLAGS} --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  FLAGS="${FLAGS} -Xcompiler-option --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+fi
+
 
 if [ "$PREBUILD" = "y" ]; then
   mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/oat/$ISA"
@@ -577,6 +593,7 @@
 dex2oat_cmdline=$(echo $dex2oat_cmdline)
 dalvikvm_cmdline=$(echo $dalvikvm_cmdline)
 vdex_cmdline=$(echo $vdex_cmdline)
+profman_cmdline=$(echo $profman_cmdline)
 
 if [ "$HOST" = "n" ]; then
     adb root > /dev/null
@@ -586,11 +603,18 @@
       adb shell mkdir -p $DEX_LOCATION
       adb push $TEST_NAME.jar $DEX_LOCATION
       adb push $TEST_NAME-ex.jar $DEX_LOCATION
+      if [ "$PROFILE" = "y" ]; then
+        adb push profile $DEX_LOCATION
+      fi
     else
       adb shell rm -r $DEX_LOCATION >/dev/null 2>&1
       adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
       adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
       adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
+      if [ "$PROFILE" = "y" ]; then
+        adb push profile $DEX_LOCATION >/dev/null 2>&1
+      fi
+
     fi
 
     LD_LIBRARY_PATH=/data/$TEST_DIRECTORY/art/$ISA
@@ -617,6 +641,7 @@
              mkdir -p ${mkdir_locations} && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
+             $profman_cmdline && \
              $dex2oat_cmdline && \
              $vdex_cmdline && \
              $strip_cmdline && \
@@ -693,13 +718,14 @@
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $vdex_cmdline && $strip_cmdline && $sync_cmdline && $cmdline"
+      echo "mkdir -p ${mkdir_locations} && $profman_cmdline && $dex2oat_cmdline && $vdex_cmdline && $strip_cmdline && $sync_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
 
     rm -rf ${DEX_LOCATION}/dalvik-cache/
     mkdir -p ${mkdir_locations} || exit 1
+    $profman_cmdline || { echo "Profman failed." >&2 ; exit 2; }
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $vdex_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 784f49c..535b94f 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -26,11 +26,6 @@
         "bug": "http://b/28988206"
     },
     {
-        "test": "577-profile-foreign-dex",
-        "description": "Disable 577-profile-foreign-dex",
-        "bug": "http://b/27454772"
-    },
-    {
         "tests": ["002-sleep",
                   "053-wait-some",
                   "055-enum-performance",
@@ -106,6 +101,12 @@
                         "slowness of gcstress makes this bad."]
     },
     {
+        "test": "152-dead-large-object",
+        "variant": "gcstress",
+        "description": ["152-dead-large-object requires a heap larger than what gcstress uses."],
+        "bug": "http://b/35800768"
+    },
+    {
         "tests": ["908-gc-start-finish",
                   "913-heaps"],
         "variant": "gcstress",
@@ -124,11 +125,16 @@
                         "lot."]
     },
     {
-        "tests": ["964-default-iface-init-gen",
-                 "154-gc-loop"],
+        "test": "964-default-iface-init-gen",
         "variant": "gcstress"
     },
     {
+        "test": "154-gc-loop",
+        "variant": "gcstress | jit & debug",
+        "description": ["154-gc-loop depends GC not happening too often"],
+        "bug": "http://b/35917229"
+    },
+    {
         "test": "115-native-bridge",
         "variant": "target",
         "description": ["115-native-bridge setup is complicated. Need to",
@@ -293,7 +299,7 @@
         "tests": ["000-nop",
                   "134-nodex2oat-nofallback",
                   "147-stripped-dex-fallback",
-                 "595-profile-saving"],
+                  "595-profile-saving"],
         "description": "The doesn't compile anything",
         "env_vars": {"ART_TEST_BISECTION": "true"},
         "variant": "optimizing | regalloc_gc"
@@ -318,7 +324,7 @@
     },
     {
         "tests": ["115-native-bridge",
-                 "088-monitor-verification"],
+                  "088-monitor-verification"],
         "description": "The test assume they are always compiled.",
         "env_vars": {"ART_TEST_BISECTION": "true"},
         "variant": "optimizing | regalloc_gc"
@@ -331,7 +337,8 @@
         "variant": "optimizing | regalloc_gc"
     },
     {
-        "test": "537-checker-arraycopy",
+        "tests": ["537-checker-arraycopy",
+                  "641-checker-arraycopy"],
         "env_vars": {"ART_USE_READ_BARRIER": "true"},
         "variant": "interpreter | optimizing | regalloc_gc | jit"
     }
diff --git a/test/run-test b/test/run-test
index e808dee..1715423 100755
--- a/test/run-test
+++ b/test/run-test
@@ -80,7 +80,7 @@
 
 # ANDROID_HOST_OUT is not set in a build environment.
 if [ -z "$ANDROID_HOST_OUT" ]; then
-    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
+    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/linux-x86
 fi
 
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
@@ -247,6 +247,11 @@
         option="$1"
         run_args="${run_args} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--build-option" ]; then
+        shift
+        option="$1"
+        build_args="${build_args} $option"
+        shift
     elif [ "x$1" = "x--runtime-option" ]; then
         shift
         option="$1"
@@ -525,22 +530,6 @@
         err_echo "--no-image is only supported on the art runtime"
         exit 1
     fi
-    if [ "$target_mode" = "no" ]; then
-        framework="${ANDROID_HOST_OUT}/framework"
-        bpath_suffix="-hostdex"
-    else
-        framework="${android_root}/framework"
-        bpath_suffix=""
-    fi
-    # TODO If the target was compiled WITH_DEXPREOPT=true then these tests will
-    # fail since these jar files will be stripped.
-    bpath="${framework}/core-libart${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
-    # Pass down the bootclasspath
-    run_args="${run_args} --runtime-option -Xbootclasspath:${bpath}"
     run_args="${run_args} --no-image"
 fi
 
@@ -611,6 +600,7 @@
         echo "  Runtime Options:"
         echo "    -O                    Run non-debug rather than debug build (off by default)."
         echo "    -Xcompiler-option     Pass an option to the compiler."
+        echo "    --build-option        Pass an option to the build script."
         echo "    --runtime-option      Pass an option to the runtime."
         echo "    --debug               Wait for a debugger to attach."
         echo "    --debuggable          Whether to compile Java code for a debugger."
@@ -776,27 +766,14 @@
 
   run_args="${run_args} --testlib ${testlib}"
 
-# To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
-build_file_size_limit=2048
-run_file_size_limit=2048
-
-# Add tests requiring a higher ulimit to this list. Ulimits might need to be raised to deal with
-# large amounts of expected output or large generated files.
-if echo "$test_dir" | grep -Eq "(083|089|961|964|971)" > /dev/null; then
-  build_file_size_limit=5120
-  run_file_size_limit=5120
-fi
-if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
-  # We will need to `adb pull` the .cfg output from the target onto the host to
-  # run checker on it. This file can be big.
-  build_file_size_limit=32768
-  run_file_size_limit=32768
-fi
-if [ ${USE_JACK} = "false" ]; then
-  # Set ulimit if we build with dx only, Jack can generate big temp files.
-  if ! ulimit -S "$build_file_size_limit"; then
-    err_echo "ulimit file size setting failed"
-  fi
+# To cause tests to fail fast, limit the file sizes created by dx, dex2oat and
+# ART output to approximately 128MB. This should be more than sufficient
+# for any test while still catching cases of runaway output.
+# Set a hard limit to encourage ART developers to increase the ulimit here if
+# needed to support a test case rather than resetting the limit in the run
+# script for the particular test in question.
+if ! ulimit -f -H 128000; then
+  err_echo "ulimit file size setting failed"
 fi
 
 good="no"
@@ -807,9 +784,6 @@
     build_exit="$?"
     echo "build exit status: $build_exit" 1>&2
     if [ "$build_exit" = '0' ]; then
-        if ! ulimit -S "$run_file_size_limit"; then
-          err_echo "ulimit file size setting failed"
-        fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" 2>&1
         run_exit="$?"
@@ -835,9 +809,6 @@
     "./${build}" $build_args >"$build_output" 2>&1
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
-        if ! ulimit -S "$run_file_size_limit"; then
-          err_echo "ulimit file size setting failed"
-        fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
         if [ "$run_checker" = "yes" ]; then
@@ -872,9 +843,6 @@
     "./${build}" $build_args >"$build_output" 2>&1
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
-        if ! ulimit -S "$run_file_size_limit"; then
-          err_echo "ulimit file size setting failed"
-        fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
         run_exit="$?"
@@ -944,9 +912,6 @@
       echo "${test_dir}: not bisecting, checker test." 1>&2
     else
       # Increase file size limit, bisection search can generate large logfiles.
-      if ! ulimit -S unlimited; then
-        err_echo "ulimit file size setting failed"
-      fi
       echo "${test_dir}: bisecting..." 1>&2
       cwd=`pwd`
       maybe_device_mode=""
diff --git a/test/testrunner/env.py b/test/testrunner/env.py
index f327974..ed4b4a9 100644
--- a/test/testrunner/env.py
+++ b/test/testrunner/env.py
@@ -176,6 +176,10 @@
 
 ART_TEST_WITH_STRACE = getEnvBoolean('ART_TEST_DEBUG_GC', False)
 
+EXTRA_DISABLED_TESTS = set(env.get("ART_TEST_RUN_TEST_SKIP", "").split())
+
+ART_TEST_RUN_TEST_BUILD = getEnvBoolean('ART_TEST_RUN_TEST_BUILD', False)
+
 TARGET_2ND_ARCH = get_build_var('TARGET_2ND_ARCH')
 TARGET_ARCH = get_build_var('TARGET_ARCH')
 if TARGET_2ND_ARCH:
diff --git a/test/testrunner/run_build_test_target.py b/test/testrunner/run_build_test_target.py
new file mode 100755
index 0000000..4c519ae
--- /dev/null
+++ b/test/testrunner/run_build_test_target.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+#
+# Copyright 2017, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build and run go/ab/git_master-art-host target
+
+Provided with a target name, the script setup the environment for
+building the test target by taking config information from
+from target_config.py.
+
+If the target field is defined in the configuration for the target, it
+invokes `make` to build the target, otherwise, it assumes
+that the its is a run-test target, and invokes testrunner.py
+script for building and running the run-tests.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+from target_config import target_config
+import env
+
+parser = argparse.ArgumentParser()
+parser.add_argument('build_target')
+parser.add_argument('-j', default='1', dest='n_threads')
+options = parser.parse_args()
+
+target = target_config[options.build_target]
+n_threads = options.n_threads
+custom_env = target.get('env', {})
+custom_env['SOONG_ALLOW_MISSING_DEPENDENCIES'] = 'true'
+print custom_env
+os.environ.update(custom_env)
+
+
+if target.get('target'):
+  build_command = 'make'
+  build_command += ' -j' + str(n_threads)
+  build_command += ' -C ' + env.ANDROID_BUILD_TOP
+  build_command += ' ' + target.get('target')
+  print build_command.split()
+  if subprocess.call(build_command.split()):
+    sys.exit(1)
+
+else:
+  run_test_command = [os.path.join(env.ANDROID_BUILD_TOP,
+                                   'art/test/testrunner/testrunner.py')]
+  run_test_command += target.get('flags', [])
+  run_test_command += ['-j', str(n_threads)]
+  run_test_command += ['-b']
+  run_test_command += ['--host']
+  run_test_command += ['--verbose']
+
+  print run_test_command
+  if subprocess.call(run_test_command):
+    sys.exit(1)
+
+sys.exit(0)
diff --git a/test/testrunner/target_config.py b/test/testrunner/target_config.py
new file mode 100644
index 0000000..1af2ae7
--- /dev/null
+++ b/test/testrunner/target_config.py
@@ -0,0 +1,265 @@
+target_config = {
+    'art-test' : {
+        'flags' : [],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-interpreter' : {
+        'flags' : ['--interpreter'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-interpreter-access-checks' : {
+        'flags' : ['--interp-ac'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-jit' : {
+        'flags' : ['--jit'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gcstress-gcverify': {
+        'flags' : ['--gcstress',
+                   '--gcverify'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-interpreter-gcstress' : {
+        'flags': ['--interpreter',
+                  '--gcstress'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-optimizing-gcstress' : {
+        'flags': ['--gcstress',
+                  '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-jit-gcstress' : {
+        'flags': ['--jit',
+                  '--gcstress'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-read-barrier' : {
+        'flags': ['--interpreter',
+                  '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-read-barrier-gcstress' : {
+        'flags' : ['--interpreter',
+                   '--optimizing',
+                   '--gcstress'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-read-barrier-table-lookup' : {
+        'flags' : ['--interpreter',
+                   '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_READ_BARRIER_TYPE' : 'TABLELOOKUP',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-debug-gc' : {
+        'flags' : ['--interpreter',
+                   '--optimizing'],
+        'env' : {
+            'ART_TEST_DEBUG_GC' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-ss-gc' : {
+        'flags' : ['--interpreter',
+                   '--optimizing',
+                   '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gss-gc' : {
+        'flags' : ['--interpreter',
+                   '--optimizing',
+                   '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-ss-gc-tlab' : {
+        'flags' : ['--interpreter',
+                   '--optimizing',
+                   '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gss-gc-tlab' : {
+        'flags' : ['--interpreter',
+                   '--optimizing',
+                   '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-tracing' : {
+        'flags' : ['--trace'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-interpreter-tracing' : {
+        'flags' : ['--interpreter',
+                   '--trace'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-forcecopy' : {
+        'flags' : ['--forcecopy'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-no-prebuild' : {
+        'flags' : ['--no-prebuild'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-no-image' : {
+        'flags' : ['--no-image'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-interpreter-no-image' : {
+        'flags' : ['--interpreter',
+                   '--no-image'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-relocate-no-patchoat' : {
+        'flags' : ['--relocate-npatchoat'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-no-dex2oat' : {
+        'flags' : ['--no-dex2oat'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-heap-poisoning' : {
+        'flags' : ['--interpreter',
+                   '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-gtest' : {
+        'target' :  'test-art-host-gtest',
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true'
+        }
+    },
+    'art-gtest-read-barrier': {
+        'target' :  'test-art-host-gtest',
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-gtest-read-barrier-table-lookup': {
+        'target' :  'test-art-host-gtest',
+        'env': {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_READ_BARRIER_TYPE' : 'TABLELOOKUP',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-gtest-ss-gc': {
+        'target' :  'test-art-host-gtest',
+        'env': {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-gss-gc': {
+        'target' :  'test-art-host-gtest',
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-ss-gc-tlab': {
+        'target' :  'test-art-host-gtest',
+        'env': {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-gtest-gss-gc-tlab': {
+        'target' :  'test-art-host-gtest',
+        'env': {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-debug-gc' : {
+        'target' :  'test-art-host-gtest',
+        'env' : {
+            'ART_TEST_DEBUG_GC' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-valgrind32': {
+        'target' : 'valgrind-test-art-host32',
+        'env': {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-valgrind64': {
+        'target' : 'valgrind-test-art-host64',
+        'env': {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-heap-poisoning': {
+        'target' : 'valgrind-test-art-host64',
+        'env' : {
+            'ART_HEAP_POISONING' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    }
+}
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index 748ec31..be84f89 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -44,10 +44,10 @@
 In the end, the script will print the failed and skipped tests if any.
 
 """
+import argparse
 import fnmatch
 import itertools
 import json
-from optparse import OptionParser
 import os
 import re
 import subprocess
@@ -56,6 +56,7 @@
 import time
 
 import env
+from target_config import target_config
 
 TARGET_TYPES = set()
 RUN_TYPES = set()
@@ -183,10 +184,18 @@
   if env.ART_TEST_OPTIMIZING_GRAPH_COLOR:
     COMPILER_TYPES.add('regalloc_gc')
     OPTIMIZING_COMPILER_TYPES.add('regalloc_gc')
-  if env.ART_TEST_OPTIMIZING or not COMPILER_TYPES: # Default
+  if env.ART_TEST_OPTIMIZING:
     COMPILER_TYPES.add('optimizing')
     OPTIMIZING_COMPILER_TYPES.add('optimizing')
 
+  # By default we run all 'compiler' variants.
+  if not COMPILER_TYPES:
+    COMPILER_TYPES.add('optimizing')
+    COMPILER_TYPES.add('jit')
+    COMPILER_TYPES.add('interpreter')
+    COMPILER_TYPES.add('interp-ac')
+    OPTIMIZING_COMPILER_TYPES.add('optimizing')
+
   if env.ART_TEST_RUN_TEST_RELOCATE:
     RELOCATE_TYPES.add('relocate')
   if env.ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT:
@@ -527,10 +536,10 @@
           test_name = ('%s...%s') % (
             test_name[:(allowed_test_length - 3)/2],
             test_name[-(allowed_test_length - 3)/2:])
-          info += ('%s %s %s') % (
-            progress_info,
-            test_name,
-            result_text)
+        info += ('%s %s %s') % (
+          progress_info,
+          test_name,
+          result_text)
     print_text(info)
   except Exception, e:
     print_text(('%s\n%s\n') % (test_name, str(e)))
@@ -596,6 +605,8 @@
   """
   if dry_run:
     return True
+  if test in env.EXTRA_DISABLED_TESTS:
+    return True
   variants_list = DISABLED_TEST_CONTAINER.get(test, {})
   for variants in variants_list:
     variants_present = True
@@ -640,12 +651,25 @@
     console_width = int(os.popen('stty size', 'r').read().split()[1])
     eraser_text = '\r' + ' ' * console_width + '\r'
     print_text(eraser_text)
+
+  # Prints information about the total tests run.
+  # E.g., "2/38 (5%) tests passed".
+  passed_test_count = total_test_count - len(skipped_tests) - len(failed_tests)
+  passed_test_information = ('%d/%d (%d%%) %s passed.\n') % (
+      passed_test_count,
+      total_test_count,
+      (passed_test_count*100)/total_test_count,
+      'tests' if passed_test_count > 1 else 'test')
+  print_text(passed_test_information)
+
+  # Prints the list of skipped tests, if any.
   if skipped_tests:
     print_text(COLOR_SKIP + 'SKIPPED TESTS' + COLOR_NORMAL + '\n')
     for test in skipped_tests:
       print_text(test + '\n')
     print_text('\n')
 
+  # Prints the list of failed tests, if any.
   if failed_tests:
     print_text(COLOR_ERROR + 'FAILED TESTS' + COLOR_NORMAL + '\n')
     for test in failed_tests:
@@ -703,6 +727,24 @@
   raise ValueError(test_name + " is not a valid test")
 
 
+def setup_env_for_build_target(build_target, parser, options):
+  """Setup environment for the build target
+
+  The method setup environment for the master-art-host targets.
+  """
+  os.environ.update(build_target['env'])
+  os.environ['SOONG_ALLOW_MISSING_DEPENDENCIES'] = 'true'
+  print_text('%s\n' % (str(os.environ)))
+
+  target_options = vars(parser.parse_args(build_target['flags']))
+  target_options['host'] = True
+  target_options['verbose'] = True
+  target_options['build'] = True
+  target_options['n_thread'] = options['n_thread']
+  target_options['dry_run'] = options['dry_run']
+
+  return target_options
+
 def parse_option():
   global verbose
   global dry_run
@@ -711,101 +753,116 @@
   global gdb
   global gdb_arg
 
-  parser = OptionParser()
-  parser.add_option('-t', '--test', dest='test', help='name of the test')
-  parser.add_option('-j', type='int', dest='n_thread')
+  parser = argparse.ArgumentParser(description="Runs all or a subset of the ART test suite.")
+  parser.add_argument('-t', '--test', dest='test', help='name of the test')
+  parser.add_argument('-j', type=int, dest='n_thread')
   for variant in TOTAL_VARIANTS_SET:
     flag = '--' + variant
     flag_dest = variant.replace('-', '_')
     if variant == '32' or variant == '64':
       flag_dest = 'n' + flag_dest
-    parser.add_option(flag, action='store_true', dest=flag_dest)
-  parser.add_option('--verbose', '-v', action='store_true', dest='verbose')
-  parser.add_option('--dry-run', action='store_true', dest='dry_run')
-  parser.add_option('-b', '--build-dependencies', action='store_true', dest='build')
-  parser.add_option('--gdb', action='store_true', dest='gdb')
-  parser.add_option('--gdb-arg', dest='gdb_arg')
+    parser.add_argument(flag, action='store_true', dest=flag_dest)
+  parser.add_argument('--verbose', '-v', action='store_true', dest='verbose')
+  parser.add_argument('--dry-run', action='store_true', dest='dry_run')
+  parser.add_argument("--skip", action="append", dest="skips", default=[],
+                      help="Skip the given test in all circumstances.")
+  parser.add_argument('--no-build-dependencies',
+                      action='store_false', dest='build',
+                      help="Don't build dependencies under any circumstances. This is the " +
+                           "behavior if ART_TEST_RUN_TEST_ALWAYS_BUILD is not set to 'true'.")
+  parser.add_argument('-b', '--build-dependencies',
+                      action='store_true', dest='build',
+                      help="Build dependencies under all circumstances. By default we will " +
+                           "not build dependencies unless ART_TEST_RUN_TEST_BUILD=true.")
+  parser.add_argument('--build-target', dest='build_target', help='master-art-host targets')
+  parser.set_defaults(build = env.ART_TEST_RUN_TEST_BUILD)
+  parser.add_argument('--gdb', action='store_true', dest='gdb')
+  parser.add_argument('--gdb-arg', dest='gdb_arg')
 
-  options = parser.parse_args()[0]
+  options = vars(parser.parse_args())
+  if options['build_target']:
+    options = setup_env_for_build_target(target_config[options['build_target']],
+                                         parser, options)
+
   test = ''
-  if options.test:
-    test = parse_test_name(options.test)
-  if options.pictest:
+  env.EXTRA_DISABLED_TESTS.update(set(options['skips']))
+  if options['test']:
+    test = parse_test_name(options['test'])
+  if options['pictest']:
     PICTEST_TYPES.add('pictest')
-  if options.ndebug:
+  if options['ndebug']:
     RUN_TYPES.add('ndebug')
-  if options.interp_ac:
+  if options['interp_ac']:
     COMPILER_TYPES.add('interp-ac')
-  if options.picimage:
+  if options['picimage']:
     IMAGE_TYPES.add('picimage')
-  if options.n64:
+  if options['n64']:
     ADDRESS_SIZES.add('64')
-  if options.interpreter:
+  if options['interpreter']:
     COMPILER_TYPES.add('interpreter')
-  if options.jni:
+  if options['jni']:
     JNI_TYPES.add('jni')
-  if options.relocate_npatchoat:
+  if options['relocate_npatchoat']:
     RELOCATE_TYPES.add('relocate-npatchoat')
-  if options.no_prebuild:
+  if options['no_prebuild']:
     PREBUILD_TYPES.add('no-prebuild')
-  if options.npictest:
+  if options['npictest']:
     PICTEST_TYPES.add('npictest')
-  if options.no_dex2oat:
+  if options['no_dex2oat']:
     PREBUILD_TYPES.add('no-dex2oat')
-  if options.jit:
+  if options['jit']:
     COMPILER_TYPES.add('jit')
-  if options.relocate:
+  if options['relocate']:
     RELOCATE_TYPES.add('relocate')
-  if options.ndebuggable:
+  if options['ndebuggable']:
     DEBUGGABLE_TYPES.add('ndebuggable')
-  if options.no_image:
+  if options['no_image']:
     IMAGE_TYPES.add('no-image')
-  if options.optimizing:
+  if options['optimizing']:
     COMPILER_TYPES.add('optimizing')
-  if options.trace:
+  if options['trace']:
     TRACE_TYPES.add('trace')
-  if options.gcstress:
+  if options['gcstress']:
     GC_TYPES.add('gcstress')
-  if options.no_relocate:
+  if options['no_relocate']:
     RELOCATE_TYPES.add('no-relocate')
-  if options.target:
+  if options['target']:
     TARGET_TYPES.add('target')
-  if options.forcecopy:
+  if options['forcecopy']:
     JNI_TYPES.add('forcecopy')
-  if options.n32:
+  if options['n32']:
     ADDRESS_SIZES.add('32')
-  if options.host:
+  if options['host']:
     TARGET_TYPES.add('host')
-  if options.gcverify:
+  if options['gcverify']:
     GC_TYPES.add('gcverify')
-  if options.debuggable:
+  if options['debuggable']:
     DEBUGGABLE_TYPES.add('debuggable')
-  if options.prebuild:
+  if options['prebuild']:
     PREBUILD_TYPES.add('prebuild')
-  if options.debug:
+  if options['debug']:
     RUN_TYPES.add('debug')
-  if options.checkjni:
+  if options['checkjni']:
     JNI_TYPES.add('checkjni')
-  if options.ntrace:
+  if options['ntrace']:
     TRACE_TYPES.add('ntrace')
-  if options.cms:
+  if options['cms']:
     GC_TYPES.add('cms')
-  if options.multipicimage:
+  if options['multipicimage']:
     IMAGE_TYPES.add('multipicimage')
-  if options.verbose:
+  if options['verbose']:
     verbose = True
-  if options.n_thread:
-    n_thread = max(1, options.n_thread)
-  if options.dry_run:
+  if options['n_thread']:
+    n_thread = max(1, options['n_thread'])
+  if options['dry_run']:
     dry_run = True
     verbose = True
-  if options.build:
-    build = True
-  if options.gdb:
+  build = options['build']
+  if options['gdb']:
     n_thread = 1
     gdb = True
-    if options.gdb_arg:
-      gdb_arg = options.gdb_arg
+    if options['gdb_arg']:
+      gdb_arg = options['gdb_arg']
 
   return test
 
diff --git a/test/ti-agent/common_helper.cc b/test/ti-agent/common_helper.cc
index ea6359e..6316a9c 100644
--- a/test/ti-agent/common_helper.cc
+++ b/test/ti-agent/common_helper.cc
@@ -25,7 +25,7 @@
 #include "art_method.h"
 #include "jni.h"
 #include "jni_internal.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
@@ -520,11 +520,14 @@
       LOG(FATAL) << "Could not load " << class_name;
     }
   }
+  BindFunctionsOnClass(jenv, env, klass.get());
+}
 
+void BindFunctionsOnClass(jvmtiEnv* jenv, JNIEnv* env, jclass klass) {
   // Use JVMTI to get the methods.
   jint method_count;
   jmethodID* methods;
-  jvmtiError methods_result = jenv->GetClassMethods(klass.get(), &method_count, &methods);
+  jvmtiError methods_result = jenv->GetClassMethods(klass, &method_count, &methods);
   if (methods_result != JVMTI_ERROR_NONE) {
     LOG(FATAL) << "Could not get methods";
   }
@@ -538,7 +541,7 @@
     }
     constexpr jint kNative = static_cast<jint>(kAccNative);
     if ((modifiers & kNative) != 0) {
-      BindMethod(jenv, env, klass.get(), methods[i]);
+      BindMethod(jenv, env, klass, methods[i]);
     }
   }
 
diff --git a/test/ti-agent/common_helper.h b/test/ti-agent/common_helper.h
index 0318501..f10356d 100644
--- a/test/ti-agent/common_helper.h
+++ b/test/ti-agent/common_helper.h
@@ -18,7 +18,7 @@
 #define ART_TEST_TI_AGENT_COMMON_HELPER_H_
 
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "ScopedLocalRef.h"
 
 namespace art {
@@ -81,6 +81,7 @@
 //
 // This will abort on failure.
 void BindFunctions(jvmtiEnv* jvmti_env, JNIEnv* env, const char* class_name);
+void BindFunctionsOnClass(jvmtiEnv* jvmti_env, JNIEnv* env, jclass klass);
 
 }  // namespace art
 
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index 351857d..fddae3a 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -18,8 +18,6 @@
 
 #include <jni.h>
 #include <stdio.h>
-// TODO I don't know?
-#include "openjdkjvmti/jvmti.h"
 
 #include "art_method-inl.h"
 #include "base/logging.h"
diff --git a/test/ti-agent/common_load.h b/test/ti-agent/common_load.h
index d254421..e79a006 100644
--- a/test/ti-agent/common_load.h
+++ b/test/ti-agent/common_load.h
@@ -17,8 +17,7 @@
 #ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
 #define ART_TEST_TI_AGENT_COMMON_LOAD_H_
 
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 namespace art {
 
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index 493eafb..f79377d 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -23,7 +23,6 @@
 LOCAL_SRC_FILES := $(call all-java-files-under, src)
 LOCAL_JAR_MANIFEST := src/manifest.txt
 LOCAL_JAVA_RESOURCE_FILES := \
-  $(LOCAL_PATH)/src/help.html \
   $(LOCAL_PATH)/src/style.css
 
 LOCAL_STATIC_JAVA_LIBRARIES := perflib-prebuilt guavalib trove-prebuilt
@@ -79,8 +78,9 @@
 # BUILD_HOST_DALVIK_JAVA_LIBRARY above.
 AHAT_TEST_DUMP_JAR := $(LOCAL_BUILT_MODULE)
 AHAT_TEST_DUMP_HPROF := $(intermediates.COMMON)/test-dump.hprof
+AHAT_TEST_DUMP_BASE_HPROF := $(intermediates.COMMON)/test-dump-base.hprof
 
-# Run ahat-test-dump.jar to generate test-dump.hprof
+# Run ahat-test-dump.jar to generate test-dump.hprof and test-dump-base.hprof
 AHAT_TEST_DUMP_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
   $(ART_HOST_SHARED_LIBRARY_DEPENDENCIES) \
@@ -93,12 +93,19 @@
 $(AHAT_TEST_DUMP_HPROF): $(AHAT_TEST_DUMP_JAR) $(AHAT_TEST_DUMP_DEPENDENCIES)
 	$(PRIVATE_AHAT_TEST_ART) -cp $(PRIVATE_AHAT_TEST_DUMP_JAR) Main $@
 
+$(AHAT_TEST_DUMP_BASE_HPROF): PRIVATE_AHAT_TEST_ART := $(HOST_OUT_EXECUTABLES)/art
+$(AHAT_TEST_DUMP_BASE_HPROF): PRIVATE_AHAT_TEST_DUMP_JAR := $(AHAT_TEST_DUMP_JAR)
+$(AHAT_TEST_DUMP_BASE_HPROF): PRIVATE_AHAT_TEST_DUMP_DEPENDENCIES := $(AHAT_TEST_DUMP_DEPENDENCIES)
+$(AHAT_TEST_DUMP_BASE_HPROF): $(AHAT_TEST_DUMP_JAR) $(AHAT_TEST_DUMP_DEPENDENCIES)
+	$(PRIVATE_AHAT_TEST_ART) -cp $(PRIVATE_AHAT_TEST_DUMP_JAR) Main $@ --base
+
 .PHONY: ahat-test
 ahat-test: PRIVATE_AHAT_TEST_DUMP_HPROF := $(AHAT_TEST_DUMP_HPROF)
+ahat-test: PRIVATE_AHAT_TEST_DUMP_BASE_HPROF := $(AHAT_TEST_DUMP_BASE_HPROF)
 ahat-test: PRIVATE_AHAT_TEST_JAR := $(AHAT_TEST_JAR)
 ahat-test: PRIVATE_AHAT_PROGUARD_MAP := $(AHAT_TEST_DUMP_PROGUARD_MAP)
-ahat-test: $(AHAT_TEST_JAR) $(AHAT_TEST_DUMP_HPROF)
-	java -Dahat.test.dump.hprof=$(PRIVATE_AHAT_TEST_DUMP_HPROF) -Dahat.test.dump.map=$(PRIVATE_AHAT_PROGUARD_MAP) -jar $(PRIVATE_AHAT_TEST_JAR)
+ahat-test: $(AHAT_TEST_JAR) $(AHAT_TEST_DUMP_HPROF) $(AHAT_TEST_DUMP_BASE_HPROF)
+	java -enableassertions -Dahat.test.dump.hprof=$(PRIVATE_AHAT_TEST_DUMP_HPROF) -Dahat.test.dump.base.hprof=$(PRIVATE_AHAT_TEST_DUMP_BASE_HPROF) -Dahat.test.dump.map=$(PRIVATE_AHAT_PROGUARD_MAP) -jar $(PRIVATE_AHAT_TEST_JAR)
 
 # Clean up local variables.
 AHAT_TEST_DUMP_DEPENDENCIES :=
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 8dfb4ab..133426f 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -1,22 +1,21 @@
 AHAT - Android Heap Analysis Tool
 
 Usage:
-  java -jar ahat.jar [-p port] [--proguard-map FILE] FILE
-    Launch an http server for viewing the given Android heap-dump FILE.
+  java -jar ahat.jar [OPTIONS] FILE
+    Launch an http server for viewing the given Android heap dump FILE.
 
-  Options:
+  OPTIONS:
     -p <port>
        Serve pages on the given port. Defaults to 7100.
     --proguard-map FILE
        Use the proguard map FILE to deobfuscate the heap dump.
+    --baseline FILE
+       Diff the heap dump against the given baseline heap dump FILE.
+    --baseline-proguard-map FILE
+       Use the proguard map FILE to deobfuscate the baseline heap dump.
 
 TODO:
- * Have a way to diff two heap dumps.
-
- * Add more tips to the help page.
-   - Recommend how to start looking at a heap dump.
-   - Say how to enable allocation sites.
-   - Where to submit feedback, questions, and bug reports.
+ * Add a user guide.
  * Dim 'image' and 'zygote' heap sizes slightly? Why do we even show these?
  * Let user re-sort sites objects info by clicking column headers.
  * Let user re-sort "Objects" list.
@@ -49,9 +48,9 @@
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
    showing all the instances.
- * That InstanceUtils.asString properly takes into account "offset" and
+ * That Instance.asString properly takes into account "offset" and
    "count" fields, if they are present.
- * InstanceUtils.getDexCacheLocation
+ * Instance.getDexCacheLocation
 
 Reported Issues:
  * Request to be able to sort tables by size.
@@ -76,7 +75,14 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
- 0.9 Pending
+ 1.1 Feb 21, 2017
+   Show java.lang.ref.Reference referents as "unreachable" instead of null.
+
+ 1.0 Dec 20, 2016
+   Add support for diffing two heap dumps.
+   Remove native allocations view.
+   Remove outdated help page.
+   Significant refactoring of ahat internals.
 
  0.8 Oct 18, 2016
    Show sample path from GC root with field names in place of dominator path.
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
deleted file mode 100644
index ba8243f..0000000
--- a/tools/ahat/src/AhatSnapshot.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.ProguardMap;
-import com.android.tools.perflib.heap.RootObj;
-import com.android.tools.perflib.heap.RootType;
-import com.android.tools.perflib.heap.Snapshot;
-import com.android.tools.perflib.heap.StackFrame;
-import com.android.tools.perflib.heap.StackTrace;
-
-import com.google.common.collect.Lists;
-
-import gnu.trove.TObjectProcedure;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A wrapper over the perflib snapshot that provides the behavior we use in
- * ahat.
- */
-class AhatSnapshot {
-  private final Snapshot mSnapshot;
-  private final List<Heap> mHeaps;
-
-  // Map from Instance to the list of Instances it immediately dominates.
-  private final Map<Instance, List<Instance>> mDominated
-    = new HashMap<Instance, List<Instance>>();
-
-  // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
-  private final List<Instance> mRooted = new ArrayList<Instance>();
-
-  // Map from roots to their types.
-  // Instances are only included if they are roots, and the collection of root
-  // types is guaranteed to be non-empty.
-  private final Map<Instance, Collection<RootType>> mRoots
-    = new HashMap<Instance, Collection<RootType>>();
-
-  private final Site mRootSite = new Site("ROOT");
-  private final Map<Heap, Long> mHeapSizes = new HashMap<Heap, Long>();
-
-  private final List<InstanceUtils.NativeAllocation> mNativeAllocations
-    = new ArrayList<InstanceUtils.NativeAllocation>();
-
-  /**
-   * Create an AhatSnapshot from an hprof file.
-   */
-  public static AhatSnapshot fromHprof(File hprof, ProguardMap map) throws IOException {
-    Snapshot snapshot = Snapshot.createSnapshot(new MemoryMappedFileBuffer(hprof), map);
-    snapshot.computeDominators();
-    return new AhatSnapshot(snapshot);
-  }
-
-  /**
-   * Construct an AhatSnapshot for the given perflib snapshot.
-   * Ther user is responsible for calling snapshot.computeDominators before
-   * calling this AhatSnapshot constructor.
-   */
-  private AhatSnapshot(Snapshot snapshot) {
-    mSnapshot = snapshot;
-    mHeaps = new ArrayList<Heap>(mSnapshot.getHeaps());
-
-    final ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
-    for (Heap heap : mHeaps) {
-      // Use a single element array for the total to act as a reference to a
-      // long.
-      final long[] total = new long[]{0};
-      TObjectProcedure<Instance> processInstance = new TObjectProcedure<Instance>() {
-        @Override
-        public boolean execute(Instance inst) {
-          Instance dominator = inst.getImmediateDominator();
-          if (dominator != null) {
-            total[0] += inst.getSize();
-
-            if (dominator == Snapshot.SENTINEL_ROOT) {
-              mRooted.add(inst);
-            }
-
-            // Properly label the class of a class object.
-            if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
-                inst.setClassId(javaLangClass.getId());
-            }
-
-            // Update dominated instances.
-            List<Instance> instances = mDominated.get(dominator);
-            if (instances == null) {
-              instances = new ArrayList<Instance>();
-              mDominated.put(dominator, instances);
-            }
-            instances.add(inst);
-
-            // Update sites.
-            List<StackFrame> path = Collections.emptyList();
-            StackTrace stack = getStack(inst);
-            int stackId = getStackTraceSerialNumber(stack);
-            if (stack != null) {
-              StackFrame[] frames = getStackFrames(stack);
-              if (frames != null && frames.length > 0) {
-                path = Lists.reverse(Arrays.asList(frames));
-              }
-            }
-            mRootSite.add(stackId, 0, path.iterator(), inst);
-
-            // Update native allocations.
-            InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
-            if (alloc != null) {
-              mNativeAllocations.add(alloc);
-            }
-          }
-          return true;
-        }
-      };
-      for (Instance instance : heap.getClasses()) {
-        processInstance.execute(instance);
-      }
-      heap.forEachInstance(processInstance);
-      mHeapSizes.put(heap, total[0]);
-    }
-
-    // Record the roots and their types.
-    for (RootObj root : snapshot.getGCRoots()) {
-      Instance inst = root.getReferredInstance();
-      Collection<RootType> types = mRoots.get(inst);
-      if (types == null) {
-        types = new HashSet<RootType>();
-        mRoots.put(inst, types);
-      }
-      types.add(root.getRootType());
-    }
-  }
-
-  // Note: This method is exposed for testing purposes.
-  public ClassObj findClass(String name) {
-    return mSnapshot.findClass(name);
-  }
-
-  public Instance findInstance(long id) {
-    return mSnapshot.findInstance(id);
-  }
-
-  public int getHeapIndex(Heap heap) {
-    return mSnapshot.getHeapIndex(heap);
-  }
-
-  public Heap getHeap(String name) {
-    return mSnapshot.getHeap(name);
-  }
-
-  /**
-   * Returns a collection of instances whose immediate dominator is the
-   * SENTINEL_ROOT.
-   */
-  public List<Instance> getRooted() {
-    return mRooted;
-  }
-
-  /**
-   * Returns true if the given instance is a root.
-   */
-  public boolean isRoot(Instance inst) {
-    return mRoots.containsKey(inst);
-  }
-
-  /**
-   * Returns the list of root types for the given instance, or null if the
-   * instance is not a root.
-   */
-  public Collection<RootType> getRootTypes(Instance inst) {
-    return mRoots.get(inst);
-  }
-
-  public List<Heap> getHeaps() {
-    return mHeaps;
-  }
-
-  public Site getRootSite() {
-    return mRootSite;
-  }
-
-  /**
-   * Look up the site at which the given object was allocated.
-   */
-  public Site getSiteForInstance(Instance inst) {
-    Site site = mRootSite;
-    StackTrace stack = getStack(inst);
-    if (stack != null) {
-      StackFrame[] frames = getStackFrames(stack);
-      if (frames != null) {
-        List<StackFrame> path = Lists.reverse(Arrays.asList(frames));
-        site = mRootSite.getChild(path.iterator());
-      }
-    }
-    return site;
-  }
-
-  /**
-   * Return a list of those objects immediately dominated by the given
-   * instance.
-   */
-  public List<Instance> getDominated(Instance inst) {
-    return mDominated.get(inst);
-  }
-
-  /**
-   * Return the total size of reachable objects allocated on the given heap.
-   */
-  public long getHeapSize(Heap heap) {
-    return mHeapSizes.get(heap);
-  }
-
-  /**
-   * Return the class name for the given class object.
-   * classObj may be null, in which case "(class unknown)" is returned.
-   */
-  public static String getClassName(ClassObj classObj) {
-    if (classObj == null) {
-      return "(class unknown)";
-    }
-    return classObj.getClassName();
-  }
-
-  // Return the stack where the given instance was allocated.
-  private static StackTrace getStack(Instance inst) {
-    return inst.getStack();
-  }
-
-  // Return the list of stack frames for a stack trace.
-  private static StackFrame[] getStackFrames(StackTrace stack) {
-    return stack.getFrames();
-  }
-
-  // Return the serial number of the given stack trace.
-  private static int getStackTraceSerialNumber(StackTrace stack) {
-    return stack.getSerialNumber();
-  }
-
-  // Get the site associated with the given stack id and depth.
-  // Returns the root site if no such site found.
-  // depth of -1 means the full stack.
-  public Site getSite(int stackId, int depth) {
-    Site site = mRootSite;
-    StackTrace stack = mSnapshot.getStackTrace(stackId);
-    if (stack != null) {
-      StackFrame[] frames = getStackFrames(stack);
-      if (frames != null) {
-        List<StackFrame> path = Lists.reverse(Arrays.asList(frames));
-        if (depth >= 0) {
-          path = path.subList(0, depth);
-        }
-        site = mRootSite.getChild(path.iterator());
-      }
-    }
-    return site;
-  }
-
-  // Return a list of known native allocations in the snapshot.
-  public List<InstanceUtils.NativeAllocation> getNativeAllocations() {
-    return mNativeAllocations;
-  }
-}
diff --git a/tools/ahat/src/BitmapHandler.java b/tools/ahat/src/BitmapHandler.java
index 0f567e3..836aef6 100644
--- a/tools/ahat/src/BitmapHandler.java
+++ b/tools/ahat/src/BitmapHandler.java
@@ -16,7 +16,8 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Instance;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
 import com.sun.net.httpserver.HttpExchange;
 import com.sun.net.httpserver.HttpHandler;
 import java.awt.image.BufferedImage;
@@ -38,9 +39,9 @@
       Query query = new Query(exchange.getRequestURI());
       long id = query.getLong("id", 0);
       BufferedImage bitmap = null;
-      Instance inst = mSnapshot.findInstance(id);
+      AhatInstance inst = mSnapshot.findInstance(id);
       if (inst != null) {
-        bitmap = InstanceUtils.asBitmap(inst);
+        bitmap = inst.asBitmap();
       }
 
       if (bitmap != null) {
diff --git a/tools/ahat/src/Column.java b/tools/ahat/src/Column.java
index b7f2829..819e586 100644
--- a/tools/ahat/src/Column.java
+++ b/tools/ahat/src/Column.java
@@ -22,14 +22,24 @@
 class Column {
   public DocString heading;
   public Align align;
+  public boolean visible;
 
   public static enum Align {
     LEFT, RIGHT
   };
 
-  public Column(DocString heading, Align align) {
+  public Column(DocString heading, Align align, boolean visible) {
     this.heading = heading;
     this.align = align;
+    this.visible = visible;
+  }
+
+  public Column(String heading, Align align, boolean visible) {
+    this(DocString.text(heading), align, visible);
+  }
+
+  public Column(DocString heading, Align align) {
+    this(heading, align, true);
   }
 
   /**
diff --git a/tools/ahat/src/DocString.java b/tools/ahat/src/DocString.java
index 19666de..c6303c8 100644
--- a/tools/ahat/src/DocString.java
+++ b/tools/ahat/src/DocString.java
@@ -53,7 +53,6 @@
   public static DocString link(URI uri, DocString content) {
     DocString doc = new DocString();
     return doc.appendLink(uri, content);
-
   }
 
   /**
@@ -86,6 +85,78 @@
     return this;
   }
 
+  /**
+   * Adorn the given string to indicate it represents something added relative
+   * to a baseline.
+   */
+  public static DocString added(DocString str) {
+    DocString string = new DocString();
+    string.mStringBuilder.append("<span class=\"added\">");
+    string.mStringBuilder.append(str.html());
+    string.mStringBuilder.append("</span>");
+    return string;
+  }
+
+  /**
+   * Adorn the given string to indicate it represents something added relative
+   * to a baseline.
+   */
+  public static DocString added(String str) {
+    return added(text(str));
+  }
+
+  /**
+   * Adorn the given string to indicate it represents something removed relative
+   * to a baseline.
+   */
+  public static DocString removed(DocString str) {
+    DocString string = new DocString();
+    string.mStringBuilder.append("<span class=\"removed\">");
+    string.mStringBuilder.append(str.html());
+    string.mStringBuilder.append("</span>");
+    return string;
+  }
+
+  /**
+   * Adorn the given string to indicate it represents something removed relative
+   * to a baseline.
+   */
+  public static DocString removed(String str) {
+    return removed(text(str));
+  }
+
+  /**
+   * Standard formatted DocString for describing a change in size relative to
+   * a baseline.
+   * @param noCurrent - whether no current object exists.
+   * @param noBaseline - whether no basline object exists.
+   * @param current - the size of the current object.
+   * @param baseline - the size of the baseline object.
+   */
+  public static DocString delta(boolean noCurrent, boolean noBaseline,
+      long current, long baseline) {
+    DocString doc = new DocString();
+    return doc.appendDelta(noCurrent, noBaseline, current, baseline);
+  }
+
+  /**
+   * Standard formatted DocString for describing a change in size relative to
+   * a baseline.
+   */
+  public DocString appendDelta(boolean noCurrent, boolean noBaseline,
+      long current, long baseline) {
+    if (noCurrent) {
+      append(removed(format("%+,14d", 0 - baseline)));
+    } else if (noBaseline) {
+      append(added("new"));
+    } else if (current > baseline) {
+      append(added(format("%+,14d", current - baseline)));
+    } else if (current < baseline) {
+      append(removed(format("%+,14d", current - baseline)));
+    }
+    return this;
+  }
+
   public DocString appendLink(URI uri, DocString content) {
     mStringBuilder.append("<a href=\"");
     mStringBuilder.append(uri.toASCIIString());
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index 7a673f5..f73e3ca 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -16,8 +16,10 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.Instance;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Sort;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -39,39 +41,32 @@
    * @param instances the collection of instances to generate a list for
    */
   public static void render(final AhatSnapshot snapshot,
-      Doc doc, Query query, String id, Collection<Instance> instances) {
-    List<Instance> insts = new ArrayList<Instance>(instances);
+      Doc doc, Query query, String id, Collection<AhatInstance> instances) {
+    List<AhatInstance> insts = new ArrayList<AhatInstance>(instances);
     Collections.sort(insts, Sort.defaultInstanceCompare(snapshot));
-    HeapTable.render(doc, query, id, new TableConfig(snapshot), snapshot, insts);
+    HeapTable.render(doc, query, id, new TableConfig(), snapshot, insts);
   }
 
-  private static class TableConfig implements HeapTable.TableConfig<Instance> {
-    AhatSnapshot mSnapshot;
-
-    public TableConfig(AhatSnapshot snapshot) {
-      mSnapshot = snapshot;
-    }
-
+  private static class TableConfig implements HeapTable.TableConfig<AhatInstance> {
     @Override
     public String getHeapsDescription() {
       return "Bytes Retained by Heap";
     }
 
     @Override
-    public long getSize(Instance element, Heap heap) {
-      int index = mSnapshot.getHeapIndex(heap);
-      return element.getRetainedSize(index);
+    public long getSize(AhatInstance element, AhatHeap heap) {
+      return element.getRetainedSize(heap);
     }
 
     @Override
-    public List<HeapTable.ValueConfig<Instance>> getValueConfigs() {
-      HeapTable.ValueConfig<Instance> value = new HeapTable.ValueConfig<Instance>() {
+    public List<HeapTable.ValueConfig<AhatInstance>> getValueConfigs() {
+      HeapTable.ValueConfig<AhatInstance> value = new HeapTable.ValueConfig<AhatInstance>() {
         public String getDescription() {
           return "Object";
         }
 
-        public DocString render(Instance element) {
-          return Value.render(mSnapshot, element);
+        public DocString render(AhatInstance element) {
+          return Summarizer.summarize(element);
         }
       };
       return Collections.singletonList(value);
diff --git a/tools/ahat/src/HeapTable.java b/tools/ahat/src/HeapTable.java
index 5b84048..9abbe4a 100644
--- a/tools/ahat/src/HeapTable.java
+++ b/tools/ahat/src/HeapTable.java
@@ -16,7 +16,9 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Heap;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Diffable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -39,21 +41,31 @@
    */
   public interface TableConfig<T> {
     String getHeapsDescription();
-    long getSize(T element, Heap heap);
+    long getSize(T element, AhatHeap heap);
     List<ValueConfig<T>> getValueConfigs();
   }
 
+  private static DocString sizeString(long size, boolean isPlaceHolder) {
+    DocString string = new DocString();
+    if (isPlaceHolder) {
+      string.append(DocString.removed("del"));
+    } else if (size != 0) {
+      string.appendFormat("%,14d", size);
+    }
+    return string;
+  }
+
   /**
    * Render the table to the given document.
    * @param query - The page query.
    * @param id - A unique identifier for the table on the page.
    */
-  public static <T> void render(Doc doc, Query query, String id,
+  public static <T extends Diffable<T>> void render(Doc doc, Query query, String id,
       TableConfig<T> config, AhatSnapshot snapshot, List<T> elements) {
     // Only show the heaps that have non-zero entries.
-    List<Heap> heaps = new ArrayList<Heap>();
-    for (Heap heap : snapshot.getHeaps()) {
-      if (hasNonZeroEntry(snapshot, heap, config, elements)) {
+    List<AhatHeap> heaps = new ArrayList<AhatHeap>();
+    for (AhatHeap heap : snapshot.getHeaps()) {
+      if (hasNonZeroEntry(heap, config, elements)) {
         heaps.add(heap);
       }
     }
@@ -61,14 +73,14 @@
     List<ValueConfig<T>> values = config.getValueConfigs();
 
     // Print the heap and values descriptions.
-    boolean showTotal = heaps.size() > 1;
     List<Column> subcols = new ArrayList<Column>();
-    for (Heap heap : heaps) {
+    for (AhatHeap heap : heaps) {
       subcols.add(new Column(heap.getName(), Column.Align.RIGHT));
+      subcols.add(new Column("Δ", Column.Align.RIGHT, snapshot.isDiffed()));
     }
-    if (showTotal) {
-      subcols.add(new Column("Total", Column.Align.RIGHT));
-    }
+    boolean showTotal = heaps.size() > 1;
+    subcols.add(new Column("Total", Column.Align.RIGHT, showTotal));
+    subcols.add(new Column("Δ", Column.Align.RIGHT, showTotal && snapshot.isDiffed()));
     List<Column> cols = new ArrayList<Column>();
     for (ValueConfig value : values) {
       cols.add(new Column(value.getDescription()));
@@ -79,16 +91,20 @@
     SubsetSelector<T> selector = new SubsetSelector(query, id, elements);
     ArrayList<DocString> vals = new ArrayList<DocString>();
     for (T elem : selector.selected()) {
+      T base = elem.getBaseline();
       vals.clear();
       long total = 0;
-      for (Heap heap : heaps) {
+      long basetotal = 0;
+      for (AhatHeap heap : heaps) {
         long size = config.getSize(elem, heap);
+        long basesize = config.getSize(base, heap.getBaseline());
         total += size;
-        vals.add(size == 0 ? DocString.text("") : DocString.format("%,14d", size));
+        basetotal += basesize;
+        vals.add(sizeString(size, elem.isPlaceHolder()));
+        vals.add(DocString.delta(elem.isPlaceHolder(), base.isPlaceHolder(), size, basesize));
       }
-      if (showTotal) {
-        vals.add(total == 0 ? DocString.text("") : DocString.format("%,14d", total));
-      }
+      vals.add(sizeString(total, elem.isPlaceHolder()));
+      vals.add(DocString.delta(elem.isPlaceHolder(), base.isPlaceHolder(), total, basetotal));
 
       for (ValueConfig<T> value : values) {
         vals.add(value.render(elem));
@@ -99,27 +115,36 @@
     // Print a summary of the remaining entries if there are any.
     List<T> remaining = selector.remaining();
     if (!remaining.isEmpty()) {
-      Map<Heap, Long> summary = new HashMap<Heap, Long>();
-      for (Heap heap : heaps) {
+      Map<AhatHeap, Long> summary = new HashMap<AhatHeap, Long>();
+      Map<AhatHeap, Long> basesummary = new HashMap<AhatHeap, Long>();
+      for (AhatHeap heap : heaps) {
         summary.put(heap, 0L);
+        basesummary.put(heap, 0L);
       }
 
       for (T elem : remaining) {
-        for (Heap heap : heaps) {
-          summary.put(heap, summary.get(heap) + config.getSize(elem, heap));
+        for (AhatHeap heap : heaps) {
+          long size = config.getSize(elem, heap);
+          summary.put(heap, summary.get(heap) + size);
+
+          long basesize = config.getSize(elem.getBaseline(), heap.getBaseline());
+          basesummary.put(heap, basesummary.get(heap) + basesize);
         }
       }
 
       vals.clear();
       long total = 0;
-      for (Heap heap : heaps) {
+      long basetotal = 0;
+      for (AhatHeap heap : heaps) {
         long size = summary.get(heap);
+        long basesize = basesummary.get(heap);
         total += size;
-        vals.add(DocString.format("%,14d", size));
+        basetotal += basesize;
+        vals.add(sizeString(size, false));
+        vals.add(DocString.delta(false, false, size, basesize));
       }
-      if (showTotal) {
-        vals.add(DocString.format("%,14d", total));
-      }
+      vals.add(sizeString(total, false));
+      vals.add(DocString.delta(false, false, total, basetotal));
 
       for (ValueConfig<T> value : values) {
         vals.add(DocString.text("..."));
@@ -131,11 +156,13 @@
   }
 
   // Returns true if the given heap has a non-zero size entry.
-  public static <T> boolean hasNonZeroEntry(AhatSnapshot snapshot, Heap heap,
+  public static <T extends Diffable<T>> boolean hasNonZeroEntry(AhatHeap heap,
       TableConfig<T> config, List<T> elements) {
-    if (snapshot.getHeapSize(heap) > 0) {
+    AhatHeap baseheap = heap.getBaseline();
+    if (heap.getSize() > 0 || baseheap.getSize() > 0) {
       for (T element : elements) {
-        if (config.getSize(element, heap) > 0) {
+        if (config.getSize(element, heap) > 0 ||
+            config.getSize(element.getBaseline(), baseheap) > 0) {
           return true;
         }
       }
diff --git a/tools/ahat/src/HelpHandler.java b/tools/ahat/src/HelpHandler.java
deleted file mode 100644
index 8de3c85..0000000
--- a/tools/ahat/src/HelpHandler.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.google.common.io.ByteStreams;
-import com.sun.net.httpserver.HttpExchange;
-import com.sun.net.httpserver.HttpHandler;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-
-/**
- * HelpHandler.
- *
- * HttpHandler to show the help page.
- */
-class HelpHandler implements HttpHandler {
-
-  @Override
-  public void handle(HttpExchange exchange) throws IOException {
-    ClassLoader loader = HelpHandler.class.getClassLoader();
-    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
-    exchange.sendResponseHeaders(200, 0);
-    PrintStream ps = new PrintStream(exchange.getResponseBody());
-    HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
-    doc.menu(Menu.getMenu());
-
-    InputStream is = loader.getResourceAsStream("help.html");
-    if (is == null) {
-      ps.println("No help available.");
-    } else {
-      ByteStreams.copy(is, ps);
-    }
-
-    doc.close();
-    ps.close();
-  }
-}
diff --git a/tools/ahat/src/HtmlDoc.java b/tools/ahat/src/HtmlDoc.java
index 5ccbacb..5a22fc7 100644
--- a/tools/ahat/src/HtmlDoc.java
+++ b/tools/ahat/src/HtmlDoc.java
@@ -86,19 +86,27 @@
     mCurrentTableColumns = columns;
     ps.println("<table>");
     for (int i = 0; i < columns.length - 1; i++) {
-      ps.format("<th>%s</th>", columns[i].heading.html());
+      if (columns[i].visible) {
+        ps.format("<th>%s</th>", columns[i].heading.html());
+      }
     }
 
     // Align the last header to the left so it's easier to see if the last
     // column is very wide.
-    ps.format("<th align=\"left\">%s</th>", columns[columns.length - 1].heading.html());
+    if (columns[columns.length - 1].visible) {
+      ps.format("<th align=\"left\">%s</th>", columns[columns.length - 1].heading.html());
+    }
   }
 
   @Override
   public void table(DocString description, List<Column> subcols, List<Column> cols) {
     mCurrentTableColumns = new Column[subcols.size() + cols.size()];
     int j = 0;
+    int visibleSubCols = 0;
     for (Column col : subcols) {
+      if (col.visible) {
+        visibleSubCols++;
+      }
       mCurrentTableColumns[j] = col;
       j++;
     }
@@ -108,21 +116,27 @@
     }
 
     ps.println("<table>");
-    ps.format("<tr><th colspan=\"%d\">%s</th>", subcols.size(), description.html());
+    ps.format("<tr><th colspan=\"%d\">%s</th>", visibleSubCols, description.html());
     for (int i = 0; i < cols.size() - 1; i++) {
-      ps.format("<th rowspan=\"2\">%s</th>", cols.get(i).heading.html());
+      if (cols.get(i).visible) {
+        ps.format("<th rowspan=\"2\">%s</th>", cols.get(i).heading.html());
+      }
     }
     if (!cols.isEmpty()) {
       // Align the last column header to the left so it can still be seen if
       // the last column is very wide.
-      ps.format("<th align=\"left\" rowspan=\"2\">%s</th>",
-          cols.get(cols.size() - 1).heading.html());
+      Column col = cols.get(cols.size() - 1);
+      if (col.visible) {
+        ps.format("<th align=\"left\" rowspan=\"2\">%s</th>", col.heading.html());
+      }
     }
     ps.println("</tr>");
 
     ps.print("<tr>");
     for (Column subcol : subcols) {
-      ps.format("<th>%s</th>", subcol.heading.html());
+      if (subcol.visible) {
+        ps.format("<th>%s</th>", subcol.heading.html());
+      }
     }
     ps.println("</tr>");
   }
@@ -141,11 +155,13 @@
 
     ps.print("<tr>");
     for (int i = 0; i < values.length; i++) {
+      if (mCurrentTableColumns[i].visible) {
       ps.print("<td");
-      if (mCurrentTableColumns[i].align == Column.Align.RIGHT) {
-        ps.print(" align=\"right\"");
+        if (mCurrentTableColumns[i].align == Column.Align.RIGHT) {
+          ps.print(" align=\"right\"");
+        }
+        ps.format(">%s</td>", values[i].html());
       }
-      ps.format(">%s</td>", values[i].html());
     }
     ps.println("</tr>");
   }
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
deleted file mode 100644
index a062afd..0000000
--- a/tools/ahat/src/InstanceUtils.java
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.ArrayInstance;
-import com.android.tools.perflib.heap.ClassInstance;
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Field;
-import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.RootObj;
-import com.android.tools.perflib.heap.Type;
-
-import java.awt.image.BufferedImage;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Utilities for extracting information from hprof instances.
- */
-class InstanceUtils {
-  /**
-   * Returns true if the given instance is an instance of a class with the
-   * given name.
-   */
-  private static boolean isInstanceOfClass(Instance inst, String className) {
-    ClassObj cls = (inst == null) ? null : inst.getClassObj();
-    return (cls != null && className.equals(cls.getClassName()));
-  }
-
-  /**
-   * Read the byte[] value from an hprof Instance.
-   * Returns null if the instance is not a byte array.
-   */
-  private static byte[] asByteArray(Instance inst) {
-    if (!(inst instanceof ArrayInstance)) {
-      return null;
-    }
-
-    ArrayInstance array = (ArrayInstance) inst;
-    if (array.getArrayType() != Type.BYTE) {
-      return null;
-    }
-
-    Object[] objs = array.getValues();
-    byte[] bytes = new byte[objs.length];
-    for (int i = 0; i < objs.length; i++) {
-      Byte b = (Byte) objs[i];
-      bytes[i] = b.byteValue();
-    }
-    return bytes;
-  }
-
-
-  /**
-   * Read the string value from an hprof Instance.
-   * Returns null if the object can't be interpreted as a string.
-   */
-  public static String asString(Instance inst) {
-    return asString(inst, -1);
-  }
-
-  /**
-   * Read the string value from an hprof Instance.
-   * Returns null if the object can't be interpreted as a string.
-   * The returned string is truncated to maxChars characters.
-   * If maxChars is negative, the returned string is not truncated.
-   */
-  public static String asString(Instance inst, int maxChars) {
-    // The inst object could either be a java.lang.String or a char[]. If it
-    // is a char[], use that directly as the value, otherwise use the value
-    // field of the string object. The field accesses for count and offset
-    // later on will work okay regardless of what type the inst object is.
-    boolean isString = isInstanceOfClass(inst, "java.lang.String");
-    Object value = isString ? getField(inst, "value") : inst;
-
-    if (!(value instanceof ArrayInstance)) {
-      return null;
-    }
-
-    ArrayInstance chars = (ArrayInstance) value;
-    int numChars = chars.getLength();
-    int offset = getIntField(inst, "offset", 0);
-    int count = getIntField(inst, "count", numChars);
-
-    // With string compression enabled, the array type can be BYTE but in that case
-    // offset must be 0 and count must match numChars.
-    if (isString && (chars.getArrayType() == Type.BYTE) && (offset == 0) && (count == numChars)) {
-      int length = (0 <= maxChars && maxChars < numChars) ? maxChars : numChars;
-      return new String(chars.asRawByteArray(/* offset */ 0, length), StandardCharsets.US_ASCII);
-    }
-    if (chars.getArrayType() != Type.CHAR) {
-      return null;
-    }
-    if (count == 0) {
-      return "";
-    }
-    if (0 <= maxChars && maxChars < count) {
-      count = maxChars;
-    }
-
-    int end = offset + count - 1;
-    if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
-      return new String(chars.asCharArray(offset, count));
-    }
-    return null;
-  }
-
-  /**
-   * Read the bitmap data for the given android.graphics.Bitmap object.
-   * Returns null if the object isn't for android.graphics.Bitmap or the
-   * bitmap data couldn't be read.
-   */
-  public static BufferedImage asBitmap(Instance inst) {
-    if (!isInstanceOfClass(inst, "android.graphics.Bitmap")) {
-      return null;
-    }
-
-    Integer width = getIntField(inst, "mWidth", null);
-    if (width == null) {
-      return null;
-    }
-
-    Integer height = getIntField(inst, "mHeight", null);
-    if (height == null) {
-      return null;
-    }
-
-    byte[] buffer = getByteArrayField(inst, "mBuffer");
-    if (buffer == null) {
-      return null;
-    }
-
-    // Convert the raw data to an image
-    // Convert BGRA to ABGR
-    int[] abgr = new int[height * width];
-    for (int i = 0; i < abgr.length; i++) {
-      abgr[i] = (
-          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
-          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
-          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
-          + ((int) buffer[i * 4 + 2] & 0xFF));
-    }
-
-    BufferedImage bitmap = new BufferedImage(
-        width, height, BufferedImage.TYPE_4BYTE_ABGR);
-    bitmap.setRGB(0, 0, width, height, abgr, 0, width);
-    return bitmap;
-  }
-
-  /**
-   * Read a field of an instance.
-   * Returns null if the field value is null or if the field couldn't be read.
-   */
-  public static Object getField(Instance inst, String fieldName) {
-    if (!(inst instanceof ClassInstance)) {
-      return null;
-    }
-
-    ClassInstance clsinst = (ClassInstance) inst;
-    Object value = null;
-    int count = 0;
-    for (ClassInstance.FieldValue field : clsinst.getValues()) {
-      if (fieldName.equals(field.getField().getName())) {
-        value = field.getValue();
-        count++;
-      }
-    }
-    return count == 1 ? value : null;
-  }
-
-  /**
-   * Read a reference field of an instance.
-   * Returns null if the field value is null, or if the field couldn't be read.
-   */
-  public static Instance getRefField(Instance inst, String fieldName) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Instance)) {
-      return null;
-    }
-    return (Instance) value;
-  }
-
-  /**
-   * Read an int field of an instance.
-   * The field is assumed to be an int type.
-   * Returns <code>def</code> if the field value is not an int or could not be
-   * read.
-   */
-  private static Integer getIntField(Instance inst, String fieldName, Integer def) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Integer)) {
-      return def;
-    }
-    return (Integer) value;
-  }
-
-  /**
-   * Read a long field of an instance.
-   * The field is assumed to be a long type.
-   * Returns <code>def</code> if the field value is not an long or could not
-   * be read.
-   */
-  private static Long getLongField(Instance inst, String fieldName, Long def) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Long)) {
-      return def;
-    }
-    return (Long) value;
-  }
-
-  /**
-   * Read the given field from the given instance.
-   * The field is assumed to be a byte[] field.
-   * Returns null if the field value is null, not a byte[] or could not be read.
-   */
-  private static byte[] getByteArrayField(Instance inst, String fieldName) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Instance)) {
-      return null;
-    }
-    return asByteArray((Instance) value);
-  }
-
-  // Return the bitmap instance associated with this object, or null if there
-  // is none. This works for android.graphics.Bitmap instances and their
-  // underlying Byte[] instances.
-  public static Instance getAssociatedBitmapInstance(Instance inst) {
-    ClassObj cls = inst.getClassObj();
-    if (cls == null) {
-      return null;
-    }
-
-    if ("android.graphics.Bitmap".equals(cls.getClassName())) {
-      return inst;
-    }
-
-    if (inst instanceof ArrayInstance) {
-      ArrayInstance array = (ArrayInstance) inst;
-      if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) {
-        Instance ref = inst.getHardReverseReferences().get(0);
-        ClassObj clsref = ref.getClassObj();
-        if (clsref != null && "android.graphics.Bitmap".equals(clsref.getClassName())) {
-          return ref;
-        }
-      }
-    }
-    return null;
-  }
-
-  private static boolean isJavaLangRefReference(Instance inst) {
-    ClassObj cls = (inst == null) ? null : inst.getClassObj();
-    while (cls != null) {
-      if ("java.lang.ref.Reference".equals(cls.getClassName())) {
-        return true;
-      }
-      cls = cls.getSuperClassObj();
-    }
-    return false;
-  }
-
-  public static Instance getReferent(Instance inst) {
-    if (isJavaLangRefReference(inst)) {
-      return getRefField(inst, "referent");
-    }
-    return null;
-  }
-
-  /**
-   * Assuming inst represents a DexCache object, return the dex location for
-   * that dex cache. Returns null if the given instance doesn't represent a
-   * DexCache object or the location could not be found.
-   * If maxChars is non-negative, the returned location is truncated to
-   * maxChars in length.
-   */
-  public static String getDexCacheLocation(Instance inst, int maxChars) {
-    if (isInstanceOfClass(inst, "java.lang.DexCache")) {
-      Instance location = getRefField(inst, "location");
-      if (location != null) {
-        return asString(location, maxChars);
-      }
-    }
-    return null;
-  }
-
-  public static class NativeAllocation {
-    public long size;
-    public Heap heap;
-    public long pointer;
-    public Instance referent;
-
-    public NativeAllocation(long size, Heap heap, long pointer, Instance referent) {
-      this.size = size;
-      this.heap = heap;
-      this.pointer = pointer;
-      this.referent = referent;
-    }
-  }
-
-  /**
-   * Assuming inst represents a NativeAllocation, return information about the
-   * native allocation. Returns null if the given instance doesn't represent a
-   * native allocation.
-   */
-  public static NativeAllocation getNativeAllocation(Instance inst) {
-    if (!isInstanceOfClass(inst, "libcore.util.NativeAllocationRegistry$CleanerThunk")) {
-      return null;
-    }
-
-    Long pointer = InstanceUtils.getLongField(inst, "nativePtr", null);
-    if (pointer == null) {
-      return null;
-    }
-
-    // Search for the registry field of inst.
-    // Note: We know inst as an instance of ClassInstance because we already
-    // read the nativePtr field from it.
-    Instance registry = null;
-    for (ClassInstance.FieldValue field : ((ClassInstance) inst).getValues()) {
-      Object fieldValue = field.getValue();
-      if (fieldValue instanceof Instance) {
-        Instance fieldInst = (Instance) fieldValue;
-        if (isInstanceOfClass(fieldInst, "libcore.util.NativeAllocationRegistry")) {
-          registry = fieldInst;
-          break;
-        }
-      }
-    }
-
-    if (registry == null) {
-      return null;
-    }
-
-    Long size = InstanceUtils.getLongField(registry, "size", null);
-    if (size == null) {
-      return null;
-    }
-
-    Instance referent = null;
-    for (Instance ref : inst.getHardReverseReferences()) {
-      if (isInstanceOfClass(ref, "sun.misc.Cleaner")) {
-        referent = InstanceUtils.getReferent(ref);
-        if (referent != null) {
-          break;
-        }
-      }
-    }
-
-    if (referent == null) {
-      return null;
-    }
-    return new NativeAllocation(size, inst.getHeap(), pointer, referent);
-  }
-
-  public static class PathElement {
-    public final Instance instance;
-    public final String field;
-    public boolean isDominator;
-
-    public PathElement(Instance instance, String field) {
-      this.instance = instance;
-      this.field = field;
-      this.isDominator = false;
-    }
-  }
-
-  /**
-   * Returns a sample path from a GC root to this instance.
-   * The given instance is included as the last element of the path with an
-   * empty field description.
-   */
-  public static List<PathElement> getPathFromGcRoot(Instance inst) {
-    List<PathElement> path = new ArrayList<PathElement>();
-
-    Instance dom = inst;
-    for (PathElement elem = new PathElement(inst, ""); elem != null;
-        elem = getNextPathElementToGcRoot(elem.instance)) {
-      if (elem.instance == dom) {
-        elem.isDominator = true;
-        dom = dom.getImmediateDominator();
-      }
-      path.add(elem);
-    }
-    Collections.reverse(path);
-    return path;
-  }
-
-  /**
-   * Returns the next instance to GC root from this object and a string
-   * description of which field of that object refers to the given instance.
-   * Returns null if the given instance has no next instance to the gc root.
-   */
-  private static PathElement getNextPathElementToGcRoot(Instance inst) {
-    Instance parent = inst.getNextInstanceToGcRoot();
-    if (parent == null || parent instanceof RootObj) {
-      return null;
-    }
-
-    // Search the parent for the reference to the child.
-    // TODO: This seems terribly inefficient. Can we use data structures to
-    // help us here?
-    String description = ".???";
-    if (parent instanceof ArrayInstance) {
-      ArrayInstance array = (ArrayInstance)parent;
-      Object[] values = array.getValues();
-      for (int i = 0; i < values.length; i++) {
-        if (values[i] instanceof Instance) {
-          Instance ref = (Instance)values[i];
-          if (ref.getId() == inst.getId()) {
-            description = String.format("[%d]", i);
-            break;
-          }
-        }
-      }
-    } else if (parent instanceof ClassObj) {
-      ClassObj cls = (ClassObj)parent;
-      for (Map.Entry<Field, Object> entries : cls.getStaticFieldValues().entrySet()) {
-        if (entries.getValue() instanceof Instance) {
-          Instance ref = (Instance)entries.getValue();
-          if (ref.getId() == inst.getId()) {
-            description = "." + entries.getKey().getName();
-            break;
-          }
-        }
-      }
-    } else if (parent instanceof ClassInstance) {
-      ClassInstance obj = (ClassInstance)parent;
-      for (ClassInstance.FieldValue fields : obj.getValues()) {
-        if (fields.getValue() instanceof Instance) {
-          Instance ref = (Instance)fields.getValue();
-          if (ref.getId() == inst.getId()) {
-            description = "." + fields.getField().getName();
-            break;
-          }
-        }
-      }
-    }
-    return new PathElement(parent, description);
-  }
-}
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index c79b578..b8552fe 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -16,6 +16,8 @@
 
 package com.android.ahat;
 
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Diff;
 import com.android.tools.perflib.heap.ProguardMap;
 import com.sun.net.httpserver.HttpServer;
 import java.io.File;
@@ -29,15 +31,18 @@
 public class Main {
 
   public static void help(PrintStream out) {
-    out.println("java -jar ahat.jar [-p port] [--proguard-map FILE] FILE");
-    out.println("  Launch an http server for viewing "
-        + "the given Android heap-dump FILE.");
+    out.println("java -jar ahat.jar [OPTIONS] FILE");
+    out.println("  Launch an http server for viewing the given Android heap dump FILE.");
     out.println("");
-    out.println("Options:");
+    out.println("OPTIONS:");
     out.println("  -p <port>");
     out.println("     Serve pages on the given port. Defaults to 7100.");
     out.println("  --proguard-map FILE");
     out.println("     Use the proguard map FILE to deobfuscate the heap dump.");
+    out.println("  --baseline FILE");
+    out.println("     Diff the heap dump against the given baseline heap dump FILE.");
+    out.println("  --baseline-proguard-map FILE");
+    out.println("     Use the proguard map FILE to deobfuscate the baseline heap dump.");
     out.println("");
   }
 
@@ -51,7 +56,9 @@
     }
 
     File hprof = null;
+    File hprofbase = null;
     ProguardMap map = new ProguardMap();
+    ProguardMap mapbase = new ProguardMap();
     for (int i = 0; i < args.length; i++) {
       if ("-p".equals(args[i]) && i + 1 < args.length) {
         i++;
@@ -64,6 +71,22 @@
           System.out.println("Unable to read proguard map: " + ex);
           System.out.println("The proguard map will not be used.");
         }
+      } else if ("--baseline-proguard-map".equals(args[i]) && i + 1 < args.length) {
+        i++;
+        try {
+          mapbase.readFromFile(new File(args[i]));
+        } catch (IOException|ParseException ex) {
+          System.out.println("Unable to read baselline proguard map: " + ex);
+          System.out.println("The proguard map will not be used.");
+        }
+      } else if ("--baseline".equals(args[i]) && i + 1 < args.length) {
+        i++;
+        if (hprofbase != null) {
+          System.err.println("multiple baseline heap dumps.");
+          help(System.err);
+          return;
+        }
+        hprofbase = new File(args[i]);
       } else {
         if (hprof != null) {
           System.err.println("multiple input files.");
@@ -88,17 +111,25 @@
 
     System.out.println("Processing hprof file...");
     AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof, map);
-    server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
+
+    if (hprofbase != null) {
+      System.out.println("Processing baseline hprof file...");
+      AhatSnapshot base = AhatSnapshot.fromHprof(hprofbase, mapbase);
+
+      System.out.println("Diffing hprof files...");
+      Diff.snapshots(ahat, base);
+    }
+
+    server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof, hprofbase)));
     server.createContext("/rooted", new AhatHttpHandler(new RootedHandler(ahat)));
     server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
     server.createContext("/objects", new AhatHttpHandler(new ObjectsHandler(ahat)));
     server.createContext("/site", new AhatHttpHandler(new SiteHandler(ahat)));
-    server.createContext("/native", new AhatHttpHandler(new NativeAllocationsHandler(ahat)));
     server.createContext("/bitmap", new BitmapHandler(ahat));
-    server.createContext("/help", new HelpHandler());
     server.createContext("/style.css", new StaticHandler("style.css", "text/css"));
     server.setExecutor(Executors.newFixedThreadPool(1));
     System.out.println("Server started on localhost:" + port);
+
     server.start();
   }
 }
diff --git a/tools/ahat/src/Menu.java b/tools/ahat/src/Menu.java
index 232b849..6d38dc5 100644
--- a/tools/ahat/src/Menu.java
+++ b/tools/ahat/src/Menu.java
@@ -25,11 +25,7 @@
       .append(" - ")
       .appendLink(DocString.uri("rooted"), DocString.text("rooted"))
       .append(" - ")
-      .appendLink(DocString.uri("sites"), DocString.text("allocations"))
-      .append(" - ")
-      .appendLink(DocString.uri("native"), DocString.text("native"))
-      .append(" - ")
-      .appendLink(DocString.uri("help"), DocString.text("help"));
+      .appendLink(DocString.uri("sites"), DocString.text("allocations"));
 
   /**
    * Returns the menu as a DocString.
diff --git a/tools/ahat/src/NativeAllocationsHandler.java b/tools/ahat/src/NativeAllocationsHandler.java
deleted file mode 100644
index 17407e1..0000000
--- a/tools/ahat/src/NativeAllocationsHandler.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-
-class NativeAllocationsHandler implements AhatHandler {
-  private static final String ALLOCATIONS_ID = "allocations";
-
-  private AhatSnapshot mSnapshot;
-
-  public NativeAllocationsHandler(AhatSnapshot snapshot) {
-    mSnapshot = snapshot;
-  }
-
-  @Override
-  public void handle(Doc doc, Query query) throws IOException {
-    List<InstanceUtils.NativeAllocation> allocs = mSnapshot.getNativeAllocations();
-
-    doc.title("Registered Native Allocations");
-
-    doc.section("Overview");
-    long totalSize = 0;
-    for (InstanceUtils.NativeAllocation alloc : allocs) {
-      totalSize += alloc.size;
-    }
-    doc.descriptions();
-    doc.description(DocString.text("Number of Registered Native Allocations"),
-        DocString.format("%,14d", allocs.size()));
-    doc.description(DocString.text("Total Size of Registered Native Allocations"),
-        DocString.format("%,14d", totalSize));
-    doc.end();
-
-    doc.section("List of Allocations");
-    if (allocs.isEmpty()) {
-      doc.println(DocString.text("(none)"));
-    } else {
-      doc.table(
-          new Column("Size", Column.Align.RIGHT),
-          new Column("Heap"),
-          new Column("Native Pointer"),
-          new Column("Referent"));
-      Comparator<InstanceUtils.NativeAllocation> compare
-        = new Sort.WithPriority<InstanceUtils.NativeAllocation>(
-            new Sort.NativeAllocationByHeapName(),
-            new Sort.NativeAllocationBySize());
-      Collections.sort(allocs, compare);
-      SubsetSelector<InstanceUtils.NativeAllocation> selector
-        = new SubsetSelector(query, ALLOCATIONS_ID, allocs);
-      for (InstanceUtils.NativeAllocation alloc : selector.selected()) {
-        doc.row(
-            DocString.format("%,14d", alloc.size),
-            DocString.text(alloc.heap.getName()),
-            DocString.format("0x%x", alloc.pointer),
-            Value.render(mSnapshot, alloc.referent));
-      }
-
-      // Print a summary of the remaining entries if there are any.
-      List<InstanceUtils.NativeAllocation> remaining = selector.remaining();
-      if (!remaining.isEmpty()) {
-        long total = 0;
-        for (InstanceUtils.NativeAllocation alloc : remaining) {
-          total += alloc.size;
-        }
-
-        doc.row(
-            DocString.format("%,14d", total),
-            DocString.text("..."),
-            DocString.text("..."),
-            DocString.text("..."));
-      }
-
-      doc.end();
-      selector.render(doc);
-    }
-  }
-}
-
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 78aac17..2e0ae6e 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -16,22 +16,23 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.ArrayInstance;
-import com.android.tools.perflib.heap.ClassInstance;
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Field;
-import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.RootType;
+import com.android.ahat.heapdump.AhatArrayInstance;
+import com.android.ahat.heapdump.AhatClassInstance;
+import com.android.ahat.heapdump.AhatClassObj;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Diff;
+import com.android.ahat.heapdump.FieldValue;
+import com.android.ahat.heapdump.PathElement;
+import com.android.ahat.heapdump.Site;
+import com.android.ahat.heapdump.Value;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
+import java.util.Objects;
 
-import static com.android.ahat.InstanceUtils.PathElement;
 
 class ObjectHandler implements AhatHandler {
 
@@ -53,35 +54,43 @@
   @Override
   public void handle(Doc doc, Query query) throws IOException {
     long id = query.getLong("id", 0);
-    Instance inst = mSnapshot.findInstance(id);
+    AhatInstance inst = mSnapshot.findInstance(id);
     if (inst == null) {
       doc.println(DocString.format("No object with id %08xl", id));
       return;
     }
+    AhatInstance base = inst.getBaseline();
 
-    doc.title("Object %08x", inst.getUniqueId());
-    doc.big(Value.render(mSnapshot, inst));
+    doc.title("Object %08x", inst.getId());
+    doc.big(Summarizer.summarize(inst));
 
     printAllocationSite(doc, query, inst);
     printGcRootPath(doc, query, inst);
 
     doc.section("Object Info");
-    ClassObj cls = inst.getClassObj();
+    AhatClassObj cls = inst.getClassObj();
     doc.descriptions();
-    doc.description(DocString.text("Class"), Value.render(mSnapshot, cls));
-    doc.description(DocString.text("Size"), DocString.format("%d", inst.getSize()));
-    doc.description(
-        DocString.text("Retained Size"),
-        DocString.format("%d", inst.getTotalRetainedSize()));
+    doc.description(DocString.text("Class"), Summarizer.summarize(cls));
+
+    DocString sizeDescription = DocString.format("%,14d ", inst.getSize());
+    sizeDescription.appendDelta(false, base.isPlaceHolder(),
+        inst.getSize(), base.getSize());
+    doc.description(DocString.text("Size"), sizeDescription);
+
+    DocString rsizeDescription = DocString.format("%,14d ", inst.getTotalRetainedSize());
+    rsizeDescription.appendDelta(false, base.isPlaceHolder(),
+        inst.getTotalRetainedSize(), base.getTotalRetainedSize());
+    doc.description(DocString.text("Retained Size"), rsizeDescription);
+
     doc.description(DocString.text("Heap"), DocString.text(inst.getHeap().getName()));
 
-    Collection<RootType> rootTypes = mSnapshot.getRootTypes(inst);
+    Collection<String> rootTypes = inst.getRootTypes();
     if (rootTypes != null) {
       DocString types = new DocString();
       String comma = "";
-      for (RootType type : rootTypes) {
+      for (String type : rootTypes) {
         types.append(comma);
-        types.append(type.getName());
+        types.append(type);
         comma = ", ";
       }
       doc.description(DocString.text("Root Types"), types);
@@ -90,112 +99,146 @@
     doc.end();
 
     printBitmap(doc, inst);
-    if (inst instanceof ClassInstance) {
-      printClassInstanceFields(doc, query, mSnapshot, (ClassInstance)inst);
-    } else if (inst instanceof ArrayInstance) {
-      printArrayElements(doc, query, mSnapshot, (ArrayInstance)inst);
-    } else if (inst instanceof ClassObj) {
-      printClassInfo(doc, query, mSnapshot, (ClassObj)inst);
+    if (inst.isClassInstance()) {
+      printClassInstanceFields(doc, query, inst.asClassInstance());
+    } else if (inst.isArrayInstance()) {
+      printArrayElements(doc, query, inst.asArrayInstance());
+    } else if (inst.isClassObj()) {
+      printClassInfo(doc, query, inst.asClassObj());
     }
-    printReferences(doc, query, mSnapshot, inst);
+    printReferences(doc, query, inst);
     printDominatedObjects(doc, query, inst);
   }
 
-  private static void printClassInstanceFields(
-      Doc doc, Query query, AhatSnapshot snapshot, ClassInstance inst) {
+  private static void printClassInstanceFields(Doc doc, Query query, AhatClassInstance inst) {
     doc.section("Fields");
-    doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    SubsetSelector<ClassInstance.FieldValue> selector
-      = new SubsetSelector(query, INSTANCE_FIELDS_ID, inst.getValues());
-    for (ClassInstance.FieldValue field : selector.selected()) {
-      doc.row(
-          DocString.text(field.getField().getType().toString()),
-          DocString.text(field.getField().getName()),
-          Value.render(snapshot, field.getValue()));
+    AhatInstance base = inst.getBaseline();
+    List<FieldValue> fields = inst.getInstanceFields();
+    if (!base.isPlaceHolder()) {
+      Diff.fields(fields, base.asClassInstance().getInstanceFields());
     }
-    doc.end();
+    SubsetSelector<FieldValue> selector = new SubsetSelector(query, INSTANCE_FIELDS_ID, fields);
+    printFields(doc, inst != base && !base.isPlaceHolder(), selector.selected());
     selector.render(doc);
   }
 
-  private static void printArrayElements(
-      Doc doc, Query query, AhatSnapshot snapshot, ArrayInstance array) {
+  private static void printArrayElements(Doc doc, Query query, AhatArrayInstance array) {
     doc.section("Array Elements");
-    doc.table(new Column("Index", Column.Align.RIGHT), new Column("Value"));
-    List<Object> elements = Arrays.asList(array.getValues());
-    SubsetSelector<Object> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
+    AhatInstance base = array.getBaseline();
+    boolean diff = array.getBaseline() != array && !base.isPlaceHolder();
+    doc.table(
+        new Column("Index", Column.Align.RIGHT),
+        new Column("Value"),
+        new Column("Δ", Column.Align.LEFT, diff));
+
+    List<Value> elements = array.getValues();
+    SubsetSelector<Value> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
     int i = 0;
-    for (Object elem : selector.selected()) {
-      doc.row(DocString.format("%d", i), Value.render(snapshot, elem));
+    for (Value current : selector.selected()) {
+      DocString delta = new DocString();
+      if (diff) {
+        Value previous = Value.getBaseline(base.asArrayInstance().getValue(i));
+        if (!Objects.equals(current, previous)) {
+          delta.append("was ");
+          delta.append(Summarizer.summarize(previous));
+        }
+      }
+      doc.row(DocString.format("%d", i), Summarizer.summarize(current), delta);
       i++;
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printClassInfo(
-      Doc doc, Query query, AhatSnapshot snapshot, ClassObj clsobj) {
+  private static void printFields(Doc doc, boolean diff, List<FieldValue> fields) {
+    doc.table(
+        new Column("Type"),
+        new Column("Name"),
+        new Column("Value"),
+        new Column("Δ", Column.Align.LEFT, diff));
+
+    for (FieldValue field : fields) {
+      Value current = field.getValue();
+      DocString value;
+      if (field.isPlaceHolder()) {
+        value = DocString.removed("del");
+      } else {
+        value = Summarizer.summarize(current);
+      }
+
+      DocString delta = new DocString();
+      FieldValue basefield = field.getBaseline();
+      if (basefield.isPlaceHolder()) {
+        delta.append(DocString.added("new"));
+      } else {
+        Value previous = Value.getBaseline(basefield.getValue());
+        if (!Objects.equals(current, previous)) {
+          delta.append("was ");
+          delta.append(Summarizer.summarize(previous));
+        }
+      }
+      doc.row(DocString.text(field.getType()), DocString.text(field.getName()), value, delta);
+    }
+    doc.end();
+  }
+
+  private static void printClassInfo(Doc doc, Query query, AhatClassObj clsobj) {
     doc.section("Class Info");
     doc.descriptions();
     doc.description(DocString.text("Super Class"),
-        Value.render(snapshot, clsobj.getSuperClassObj()));
+        Summarizer.summarize(clsobj.getSuperClassObj()));
     doc.description(DocString.text("Class Loader"),
-        Value.render(snapshot, clsobj.getClassLoader()));
+        Summarizer.summarize(clsobj.getClassLoader()));
     doc.end();
 
     doc.section("Static Fields");
-    doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    List<Map.Entry<Field, Object>> fields
-      = new ArrayList<Map.Entry<Field, Object>>(clsobj.getStaticFieldValues().entrySet());
-    SubsetSelector<Map.Entry<Field, Object>> selector
-      = new SubsetSelector(query, STATIC_FIELDS_ID, fields);
-    for (Map.Entry<Field, Object> field : selector.selected()) {
-      doc.row(
-          DocString.text(field.getKey().getType().toString()),
-          DocString.text(field.getKey().getName()),
-          Value.render(snapshot, field.getValue()));
+    AhatInstance base = clsobj.getBaseline();
+    List<FieldValue> fields = clsobj.getStaticFieldValues();
+    if (!base.isPlaceHolder()) {
+      Diff.fields(fields, base.asClassObj().getStaticFieldValues());
     }
-    doc.end();
+    SubsetSelector<FieldValue> selector = new SubsetSelector(query, STATIC_FIELDS_ID, fields);
+    printFields(doc, clsobj != base && !base.isPlaceHolder(), selector.selected());
     selector.render(doc);
   }
 
-  private static void printReferences(
-      Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
+  private static void printReferences(Doc doc, Query query, AhatInstance inst) {
     doc.section("Objects with References to this Object");
     if (inst.getHardReverseReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getHardReverseReferences();
-      SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
-      for (Instance ref : selector.selected()) {
-        doc.row(Value.render(snapshot, ref));
+      List<AhatInstance> references = inst.getHardReverseReferences();
+      SubsetSelector<AhatInstance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
+      for (AhatInstance ref : selector.selected()) {
+        doc.row(Summarizer.summarize(ref));
       }
       doc.end();
       selector.render(doc);
     }
 
-    if (inst.getSoftReverseReferences() != null) {
+    if (!inst.getSoftReverseReferences().isEmpty()) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getSoftReverseReferences();
-      SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
-      for (Instance ref : selector.selected()) {
-        doc.row(Value.render(snapshot, ref));
+      List<AhatInstance> references = inst.getSoftReverseReferences();
+      SubsetSelector<AhatInstance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
+      for (AhatInstance ref : selector.selected()) {
+        doc.row(Summarizer.summarize(ref));
       }
       doc.end();
       selector.render(doc);
     }
   }
 
-  private void printAllocationSite(Doc doc, Query query, Instance inst) {
+  private void printAllocationSite(Doc doc, Query query, AhatInstance inst) {
     doc.section("Allocation Site");
-    Site site = mSnapshot.getSiteForInstance(inst);
+    Site site = inst.getSite();
     SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
   }
 
   // Draw the bitmap corresponding to this instance if there is one.
-  private static void printBitmap(Doc doc, Instance inst) {
-    Instance bitmap = InstanceUtils.getAssociatedBitmapInstance(inst);
+  private static void printBitmap(Doc doc, AhatInstance inst) {
+    AhatInstance bitmap = inst.getAssociatedBitmapInstance();
     if (bitmap != null) {
       doc.section("Bitmap Image");
       doc.println(DocString.image(
@@ -203,25 +246,25 @@
     }
   }
 
-  private void printGcRootPath(Doc doc, Query query, Instance inst) {
+  private void printGcRootPath(Doc doc, Query query, AhatInstance inst) {
     doc.section("Sample Path from GC Root");
-    List<PathElement> path = InstanceUtils.getPathFromGcRoot(inst);
+    List<PathElement> path = inst.getPathFromGcRoot();
 
-    // Add 'null' as a marker for the root.
-    path.add(0, null);
+    // Add a dummy PathElement as a marker for the root.
+    final PathElement root = new PathElement(null, null);
+    path.add(0, root);
 
     HeapTable.TableConfig<PathElement> table = new HeapTable.TableConfig<PathElement>() {
       public String getHeapsDescription() {
         return "Bytes Retained by Heap (Dominators Only)";
       }
 
-      public long getSize(PathElement element, Heap heap) {
-        if (element == null) {
-          return mSnapshot.getHeapSize(heap);
+      public long getSize(PathElement element, AhatHeap heap) {
+        if (element == root) {
+          return heap.getSize();
         }
         if (element.isDominator) {
-          int index = mSnapshot.getHeapIndex(heap);
-          return element.instance.getRetainedSize(index);
+          return element.instance.getRetainedSize(heap);
         }
         return 0;
       }
@@ -233,11 +276,11 @@
           }
 
           public DocString render(PathElement element) {
-            if (element == null) {
+            if (element == root) {
               return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
             } else {
-              DocString label = DocString.text(" → ");
-              label.append(Value.render(mSnapshot, element.instance));
+              DocString label = DocString.text("→ ");
+              label.append(Summarizer.summarize(element.instance));
               label.append(element.field);
               return label;
             }
@@ -249,9 +292,9 @@
     HeapTable.render(doc, query, DOMINATOR_PATH_ID, table, mSnapshot, path);
   }
 
-  public void printDominatedObjects(Doc doc, Query query, Instance inst) {
+  public void printDominatedObjects(Doc doc, Query query, AhatInstance inst) {
     doc.section("Immediately Dominated Objects");
-    List<Instance> instances = mSnapshot.getDominated(inst);
+    List<AhatInstance> instances = inst.getDominated();
     if (instances != null) {
       DominatedList.render(mSnapshot, doc, query, DOMINATED_OBJECTS_ID, instances);
     } else {
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 4cfb0a5..3062d23 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -16,7 +16,10 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Instance;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Site;
+import com.android.ahat.heapdump.Sort;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -33,17 +36,16 @@
 
   @Override
   public void handle(Doc doc, Query query) throws IOException {
-    int stackId = query.getInt("stack", 0);
+    int id = query.getInt("id", 0);
     int depth = query.getInt("depth", 0);
     String className = query.get("class", null);
     String heapName = query.get("heap", null);
-    Site site = mSnapshot.getSite(stackId, depth);
+    Site site = mSnapshot.getSite(id, depth);
 
-    List<Instance> insts = new ArrayList<Instance>();
-    for (Instance inst : site.getObjects()) {
+    List<AhatInstance> insts = new ArrayList<AhatInstance>();
+    for (AhatInstance inst : site.getObjects()) {
       if ((heapName == null || inst.getHeap().getName().equals(heapName))
-          && (className == null
-            || AhatSnapshot.getClassName(inst.getClassObj()).equals(className))) {
+          && (className == null || inst.getClassName().equals(className))) {
         insts.add(inst);
       }
     }
@@ -51,16 +53,22 @@
     Collections.sort(insts, Sort.defaultInstanceCompare(mSnapshot));
 
     doc.title("Objects");
+
     doc.table(
         new Column("Size", Column.Align.RIGHT),
+        new Column("Δ", Column.Align.RIGHT, mSnapshot.isDiffed()),
         new Column("Heap"),
         new Column("Object"));
-    SubsetSelector<Instance> selector = new SubsetSelector(query, OBJECTS_ID, insts);
-    for (Instance inst : selector.selected()) {
+
+    SubsetSelector<AhatInstance> selector = new SubsetSelector(query, OBJECTS_ID, insts);
+    for (AhatInstance inst : selector.selected()) {
+      AhatInstance base = inst.getBaseline();
       doc.row(
-          DocString.format("%,d", inst.getSize()),
+          DocString.format("%,14d", inst.getSize()),
+          DocString.delta(inst.isPlaceHolder(), base.isPlaceHolder(),
+            inst.getSize(), base.getSize()),
           DocString.text(inst.getHeap().getName()),
-          Value.render(mSnapshot, inst));
+          Summarizer.summarize(inst));
     }
     doc.end();
     selector.render(doc);
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index 0dbad7e..ea305c4 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -16,9 +16,11 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Heap;
-import java.io.IOException;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Diffable;
 import java.io.File;
+import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 
@@ -28,10 +30,12 @@
 
   private AhatSnapshot mSnapshot;
   private File mHprof;
+  private File mBaseHprof;
 
-  public OverviewHandler(AhatSnapshot snapshot, File hprof) {
+  public OverviewHandler(AhatSnapshot snapshot, File hprof, File basehprof) {
     mSnapshot = snapshot;
     mHprof = hprof;
+    mBaseHprof = basehprof;
   }
 
   @Override
@@ -44,42 +48,40 @@
         DocString.text("ahat version"),
         DocString.format("ahat-%s", OverviewHandler.class.getPackage().getImplementationVersion()));
     doc.description(DocString.text("hprof file"), DocString.text(mHprof.toString()));
+    if (mBaseHprof != null) {
+      doc.description(DocString.text("baseline hprof file"), DocString.text(mBaseHprof.toString()));
+    }
     doc.end();
 
     doc.section("Heap Sizes");
     printHeapSizes(doc, query);
 
-    List<InstanceUtils.NativeAllocation> allocs = mSnapshot.getNativeAllocations();
-    if (!allocs.isEmpty()) {
-      doc.section("Registered Native Allocations");
-      long totalSize = 0;
-      for (InstanceUtils.NativeAllocation alloc : allocs) {
-        totalSize += alloc.size;
-      }
-      doc.descriptions();
-      doc.description(DocString.text("Number of Registered Native Allocations"),
-          DocString.format("%,14d", allocs.size()));
-      doc.description(DocString.text("Total Size of Registered Native Allocations"),
-          DocString.format("%,14d", totalSize));
-      doc.end();
-    }
-
     doc.big(Menu.getMenu());
   }
 
-  private void printHeapSizes(Doc doc, Query query) {
-    List<Object> dummy = Collections.singletonList(null);
+  private static class TableElem implements Diffable<TableElem> {
+    @Override public TableElem getBaseline() {
+      return this;
+    }
 
-    HeapTable.TableConfig<Object> table = new HeapTable.TableConfig<Object>() {
+    @Override public boolean isPlaceHolder() {
+      return false;
+    }
+  }
+
+  private void printHeapSizes(Doc doc, Query query) {
+    List<TableElem> dummy = Collections.singletonList(new TableElem());
+
+    HeapTable.TableConfig<TableElem> table = new HeapTable.TableConfig<TableElem>() {
       public String getHeapsDescription() {
         return "Bytes Retained by Heap";
       }
 
-      public long getSize(Object element, Heap heap) {
-        return mSnapshot.getHeapSize(heap);
+      public long getSize(TableElem element, AhatHeap heap) {
+        return heap.getSize();
       }
 
-      public List<HeapTable.ValueConfig<Object>> getValueConfigs() {
+      public List<HeapTable.ValueConfig<TableElem>> getValueConfigs() {
         return Collections.emptyList();
       }
     };
diff --git a/tools/ahat/src/RootedHandler.java b/tools/ahat/src/RootedHandler.java
index ec3272f..26451a3 100644
--- a/tools/ahat/src/RootedHandler.java
+++ b/tools/ahat/src/RootedHandler.java
@@ -16,6 +16,7 @@
 
 package com.android.ahat;
 
+import com.android.ahat.heapdump.AhatSnapshot;
 import java.io.IOException;
 
 class RootedHandler implements AhatHandler {
diff --git a/tools/ahat/src/Site.java b/tools/ahat/src/Site.java
deleted file mode 100644
index dbb84f6..0000000
--- a/tools/ahat/src/Site.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.StackFrame;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-class Site {
-  // The site that this site was directly called from.
-  // mParent is null for the root site.
-  private Site mParent;
-
-  // A description of the Site. Currently this is used to uniquely identify a
-  // site within its parent.
-  private String mName;
-
-  // To identify this site, we pick one stack trace where we have seen the
-  // site. mStackId is the id for that stack trace, and mStackDepth is the
-  // depth of this site in that stack trace.
-  // For the root site, mStackId is 0 and mStackDepth is 0.
-  private int mStackId;
-  private int mStackDepth;
-
-  // Mapping from heap name to the total size of objects allocated in this
-  // site (including child sites) on the given heap.
-  private Map<String, Long> mSizesByHeap;
-
-  // Mapping from child site name to child site.
-  private Map<String, Site> mChildren;
-
-  // List of all objects allocated in this site (including child sites).
-  private List<Instance> mObjects;
-  private List<ObjectsInfo> mObjectsInfos;
-  private Map<Heap, Map<ClassObj, ObjectsInfo>> mObjectsInfoMap;
-
-  public static class ObjectsInfo {
-    public Heap heap;
-    public ClassObj classObj;
-    public long numInstances;
-    public long numBytes;
-
-    public ObjectsInfo(Heap heap, ClassObj classObj, long numInstances, long numBytes) {
-      this.heap = heap;
-      this.classObj = classObj;
-      this.numInstances = numInstances;
-      this.numBytes = numBytes;
-    }
-  }
-
-  /**
-   * Construct a root site.
-   */
-  public Site(String name) {
-    this(null, name, 0, 0);
-  }
-
-  public Site(Site parent, String name, int stackId, int stackDepth) {
-    mParent = parent;
-    mName = name;
-    mStackId = stackId;
-    mStackDepth = stackDepth;
-    mSizesByHeap = new HashMap<String, Long>();
-    mChildren = new HashMap<String, Site>();
-    mObjects = new ArrayList<Instance>();
-    mObjectsInfos = new ArrayList<ObjectsInfo>();
-    mObjectsInfoMap = new HashMap<Heap, Map<ClassObj, ObjectsInfo>>();
-  }
-
-  /**
-   * Add an instance to this site.
-   * Returns the site at which the instance was allocated.
-   */
-  public Site add(int stackId, int stackDepth, Iterator<StackFrame> path, Instance inst) {
-    mObjects.add(inst);
-
-    String heap = inst.getHeap().getName();
-    mSizesByHeap.put(heap, getSize(heap) + inst.getSize());
-
-    Map<ClassObj, ObjectsInfo> classToObjectsInfo = mObjectsInfoMap.get(inst.getHeap());
-    if (classToObjectsInfo == null) {
-      classToObjectsInfo = new HashMap<ClassObj, ObjectsInfo>();
-      mObjectsInfoMap.put(inst.getHeap(), classToObjectsInfo);
-    }
-
-    ObjectsInfo info = classToObjectsInfo.get(inst.getClassObj());
-    if (info == null) {
-      info = new ObjectsInfo(inst.getHeap(), inst.getClassObj(), 0, 0);
-      mObjectsInfos.add(info);
-      classToObjectsInfo.put(inst.getClassObj(), info);
-    }
-
-    info.numInstances++;
-    info.numBytes += inst.getSize();
-
-    if (path.hasNext()) {
-      String next = path.next().toString();
-      Site child = mChildren.get(next);
-      if (child == null) {
-        child = new Site(this, next, stackId, stackDepth + 1);
-        mChildren.put(next, child);
-      }
-      return child.add(stackId, stackDepth + 1, path, inst);
-    } else {
-      return this;
-    }
-  }
-
-  // Get the size of a site for a specific heap.
-  public long getSize(String heap) {
-    Long val = mSizesByHeap.get(heap);
-    if (val == null) {
-      return 0;
-    }
-    return val;
-  }
-
-  /**
-   * Get the list of objects allocated under this site. Includes objects
-   * allocated in children sites.
-   */
-  public Collection<Instance> getObjects() {
-    return mObjects;
-  }
-
-  public List<ObjectsInfo> getObjectsInfos() {
-    return mObjectsInfos;
-  }
-
-  // Get the combined size of the site for all heaps.
-  public long getTotalSize() {
-    long size = 0;
-    for (Long val : mSizesByHeap.values()) {
-      size += val;
-    }
-    return size;
-  }
-
-  /**
-   * Return the site this site was called from.
-   * Returns null for the root site.
-   */
-  public Site getParent() {
-    return mParent;
-  }
-
-  public String getName() {
-    return mName;
-  }
-
-  // Returns the hprof id of a stack this site appears on.
-  public int getStackId() {
-    return mStackId;
-  }
-
-  // Returns the stack depth of this site in the stack whose id is returned
-  // by getStackId().
-  public int getStackDepth() {
-    return mStackDepth;
-  }
-
-  List<Site> getChildren() {
-    return new ArrayList<Site>(mChildren.values());
-  }
-
-  // Get the child at the given path relative to this site.
-  // Returns null if no such child found.
-  Site getChild(Iterator<StackFrame> path) {
-    if (path.hasNext()) {
-      String next = path.next().toString();
-      Site child = mChildren.get(next);
-      return (child == null) ? null : child.getChild(path);
-    } else {
-      return this;
-    }
-  }
-}
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index 839e220..febf171 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -16,7 +16,10 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Heap;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Site;
+import com.android.ahat.heapdump.Sort;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.Comparator;
@@ -35,11 +38,13 @@
 
   @Override
   public void handle(Doc doc, Query query) throws IOException {
-    int stackId = query.getInt("stack", 0);
-    int depth = query.getInt("depth", -1);
-    Site site = mSnapshot.getSite(stackId, depth);
+    int id = query.getInt("id", 0);
+    int depth = query.getInt("depth", 0);
+    Site site = mSnapshot.getSite(id, depth);
 
-    doc.title("Site %s", site.getName());
+    doc.title("Site");
+    doc.big(Summarizer.summarize(site));
+
     doc.section("Allocation Site");
     SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
 
@@ -48,15 +53,14 @@
     if (children.isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
-      Collections.sort(children, new Sort.SiteBySize("app"));
-
+      Collections.sort(children, Sort.defaultSiteCompare(mSnapshot));
       HeapTable.TableConfig<Site> table = new HeapTable.TableConfig<Site>() {
         public String getHeapsDescription() {
           return "Reachable Bytes Allocated on Heap";
         }
 
-        public long getSize(Site element, Heap heap) {
-          return element.getSize(heap.getName());
+        public long getSize(Site element, AhatHeap heap) {
+          return element.getSize(heap);
         }
 
         public List<HeapTable.ValueConfig<Site>> getValueConfigs() {
@@ -66,10 +70,7 @@
             }
 
             public DocString render(Site element) {
-              return DocString.link(
-                  DocString.formattedUri("site?stack=%d&depth=%d",
-                    element.getStackId(), element.getStackDepth()),
-                  DocString.text(element.getName()));
+              return Summarizer.summarize(element);
             }
           };
           return Collections.singletonList(value);
@@ -79,29 +80,36 @@
     }
 
     doc.section("Objects Allocated");
+
     doc.table(
         new Column("Reachable Bytes Allocated", Column.Align.RIGHT),
+        new Column("Δ", Column.Align.RIGHT, mSnapshot.isDiffed()),
         new Column("Instances", Column.Align.RIGHT),
+        new Column("Δ", Column.Align.RIGHT, mSnapshot.isDiffed()),
         new Column("Heap"),
         new Column("Class"));
+
     List<Site.ObjectsInfo> infos = site.getObjectsInfos();
     Comparator<Site.ObjectsInfo> compare = new Sort.WithPriority<Site.ObjectsInfo>(
-        new Sort.ObjectsInfoByHeapName(),
-        new Sort.ObjectsInfoBySize(),
-        new Sort.ObjectsInfoByClassName());
+        Sort.OBJECTS_INFO_BY_HEAP_NAME,
+        Sort.OBJECTS_INFO_BY_SIZE,
+        Sort.OBJECTS_INFO_BY_CLASS_NAME);
     Collections.sort(infos, compare);
     SubsetSelector<Site.ObjectsInfo> selector
       = new SubsetSelector(query, OBJECTS_ALLOCATED_ID, infos);
     for (Site.ObjectsInfo info : selector.selected()) {
-      String className = AhatSnapshot.getClassName(info.classObj);
+      Site.ObjectsInfo baseinfo = info.getBaseline();
+      String className = info.getClassName();
       doc.row(
           DocString.format("%,14d", info.numBytes),
+          DocString.delta(false, false, info.numBytes, baseinfo.numBytes),
           DocString.link(
-            DocString.formattedUri("objects?stack=%d&depth=%d&heap=%s&class=%s",
-                site.getStackId(), site.getStackDepth(), info.heap.getName(), className),
+            DocString.formattedUri("objects?id=%d&depth=%d&heap=%s&class=%s",
+              site.getId(), site.getDepth(), info.heap.getName(), className),
             DocString.format("%,14d", info.numInstances)),
+          DocString.delta(false, false, info.numInstances, baseinfo.numInstances),
           DocString.text(info.heap.getName()),
-          Value.render(mSnapshot, info.classObj));
+          Summarizer.summarize(info.classObj));
     }
     doc.end();
     selector.render(doc);
diff --git a/tools/ahat/src/SitePrinter.java b/tools/ahat/src/SitePrinter.java
index 2c06b47..21ca2de 100644
--- a/tools/ahat/src/SitePrinter.java
+++ b/tools/ahat/src/SitePrinter.java
@@ -16,7 +16,9 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Heap;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Site;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -35,8 +37,8 @@
         return "Reachable Bytes Allocated on Heap";
       }
 
-      public long getSize(Site element, Heap heap) {
-        return element.getSize(heap.getName());
+      public long getSize(Site element, AhatHeap heap) {
+        return element.getSize(heap);
       }
 
       public List<HeapTable.ValueConfig<Site>> getValueConfigs() {
@@ -50,11 +52,7 @@
             if (element.getParent() != null) {
               str.append("→ ");
             }
-            str.appendLink(
-                DocString.formattedUri("site?stack=%d&depth=%d",
-                    element.getStackId(), element.getStackDepth()),
-                DocString.text(element.getName()));
-            return str;
+            return str.append(Summarizer.summarize(element));
           }
         };
         return Collections.singletonList(value);
diff --git a/tools/ahat/src/Sort.java b/tools/ahat/src/Sort.java
deleted file mode 100644
index 8a3d9f2..0000000
--- a/tools/ahat/src/Sort.java
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.Instance;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * Provides Comparators and helper functions for sorting Instances, Sites, and
- * other things.
- *
- * Note: The Comparators defined here impose orderings that are inconsistent
- * with equals. They should not be used for element lookup or search. They
- * should only be used for showing elements to the user in different orders.
- */
-class Sort {
-  /**
-   * Compare instances by their instance id.
-   * This sorts instances from smaller id to larger id.
-   */
-  public static class InstanceById implements Comparator<Instance> {
-    @Override
-    public int compare(Instance a, Instance b) {
-      return Long.compare(a.getId(), b.getId());
-    }
-  }
-
-  /**
-   * Compare instances by their total retained size.
-   * Different instances with the same total retained size are considered
-   * equal for the purposes of comparison.
-   * This sorts instances from larger retained size to smaller retained size.
-   */
-  public static class InstanceByTotalRetainedSize implements Comparator<Instance> {
-    @Override
-    public int compare(Instance a, Instance b) {
-      return Long.compare(b.getTotalRetainedSize(), a.getTotalRetainedSize());
-    }
-  }
-
-  /**
-   * Compare instances by their retained size for a given heap index.
-   * Different instances with the same total retained size are considered
-   * equal for the purposes of comparison.
-   * This sorts instances from larger retained size to smaller retained size.
-   */
-  public static class InstanceByHeapRetainedSize implements Comparator<Instance> {
-    private int mIndex;
-
-    public InstanceByHeapRetainedSize(AhatSnapshot snapshot, Heap heap) {
-      mIndex = snapshot.getHeapIndex(heap);
-    }
-
-    public InstanceByHeapRetainedSize(int heapIndex) {
-      mIndex = heapIndex;
-    }
-
-    @Override
-    public int compare(Instance a, Instance b) {
-      return Long.compare(b.getRetainedSize(mIndex), a.getRetainedSize(mIndex));
-    }
-  }
-
-  /**
-   * Compare objects based on a list of comparators, giving priority to the
-   * earlier comparators in the list.
-   */
-  public static class WithPriority<T> implements Comparator<T> {
-    private List<Comparator<T>> mComparators;
-
-    public WithPriority(Comparator<T>... comparators) {
-      mComparators = Arrays.asList(comparators);
-    }
-
-    public WithPriority(List<Comparator<T>> comparators) {
-      mComparators = comparators;
-    }
-
-    @Override
-    public int compare(T a, T b) {
-      int res = 0;
-      Iterator<Comparator<T>> iter = mComparators.iterator();
-      while (res == 0 && iter.hasNext()) {
-        res = iter.next().compare(a, b);
-      }
-      return res;
-    }
-  }
-
-  public static Comparator<Instance> defaultInstanceCompare(AhatSnapshot snapshot) {
-    List<Comparator<Instance>> comparators = new ArrayList<Comparator<Instance>>();
-
-    // Priority goes to the app heap, if we can find one.
-    Heap appHeap = snapshot.getHeap("app");
-    if (appHeap != null) {
-      comparators.add(new InstanceByHeapRetainedSize(snapshot, appHeap));
-    }
-
-    // Next is by total retained size.
-    comparators.add(new InstanceByTotalRetainedSize());
-    return new WithPriority<Instance>(comparators);
-  }
-
-  /**
-   * Compare Sites by the size of objects allocated on a given heap.
-   * Different object infos with the same size on the given heap are
-   * considered equal for the purposes of comparison.
-   * This sorts sites from larger size to smaller size.
-   */
-  public static class SiteBySize implements Comparator<Site> {
-    String mHeap;
-
-    public SiteBySize(String heap) {
-      mHeap = heap;
-    }
-
-    @Override
-    public int compare(Site a, Site b) {
-      return Long.compare(b.getSize(mHeap), a.getSize(mHeap));
-    }
-  }
-
-  /**
-   * Compare Site.ObjectsInfo by their size.
-   * Different object infos with the same total retained size are considered
-   * equal for the purposes of comparison.
-   * This sorts object infos from larger retained size to smaller size.
-   */
-  public static class ObjectsInfoBySize implements Comparator<Site.ObjectsInfo> {
-    @Override
-    public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
-      return Long.compare(b.numBytes, a.numBytes);
-    }
-  }
-
-  /**
-   * Compare Site.ObjectsInfo by heap name.
-   * Different object infos with the same heap name are considered equal for
-   * the purposes of comparison.
-   */
-  public static class ObjectsInfoByHeapName implements Comparator<Site.ObjectsInfo> {
-    @Override
-    public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
-      return a.heap.getName().compareTo(b.heap.getName());
-    }
-  }
-
-  /**
-   * Compare Site.ObjectsInfo by class name.
-   * Different object infos with the same class name are considered equal for
-   * the purposes of comparison.
-   */
-  public static class ObjectsInfoByClassName implements Comparator<Site.ObjectsInfo> {
-    @Override
-    public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
-      String aName = AhatSnapshot.getClassName(a.classObj);
-      String bName = AhatSnapshot.getClassName(b.classObj);
-      return aName.compareTo(bName);
-    }
-  }
-
-  /**
-   * Compare AhatSnapshot.NativeAllocation by heap name.
-   * Different allocations with the same heap name are considered equal for
-   * the purposes of comparison.
-   */
-  public static class NativeAllocationByHeapName
-      implements Comparator<InstanceUtils.NativeAllocation> {
-    @Override
-    public int compare(InstanceUtils.NativeAllocation a, InstanceUtils.NativeAllocation b) {
-      return a.heap.getName().compareTo(b.heap.getName());
-    }
-  }
-
-  /**
-   * Compare InstanceUtils.NativeAllocation by their size.
-   * Different allocations with the same size are considered equal for the
-   * purposes of comparison.
-   * This sorts allocations from larger size to smaller size.
-   */
-  public static class NativeAllocationBySize implements Comparator<InstanceUtils.NativeAllocation> {
-    @Override
-    public int compare(InstanceUtils.NativeAllocation a, InstanceUtils.NativeAllocation b) {
-      return Long.compare(b.size, a.size);
-    }
-  }
-}
-
diff --git a/tools/ahat/src/StaticHandler.java b/tools/ahat/src/StaticHandler.java
index fb7049d..b2805d6 100644
--- a/tools/ahat/src/StaticHandler.java
+++ b/tools/ahat/src/StaticHandler.java
@@ -17,10 +17,10 @@
 package com.android.ahat;
 
 import com.google.common.io.ByteStreams;
-import com.sun.net.httpserver.HttpHandler;
 import com.sun.net.httpserver.HttpExchange;
-import java.io.InputStream;
+import com.sun.net.httpserver.HttpHandler;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
 
diff --git a/tools/ahat/src/Summarizer.java b/tools/ahat/src/Summarizer.java
new file mode 100644
index 0000000..016eab4
--- /dev/null
+++ b/tools/ahat/src/Summarizer.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.Site;
+import com.android.ahat.heapdump.Value;
+import java.net.URI;
+
+/**
+ * Class for generating a DocString summary of an instance or value.
+ */
+class Summarizer {
+
+  // For string literals, we limit the number of characters we show to
+  // kMaxChars in case the string is really long.
+  private static int kMaxChars = 200;
+
+  /**
+   * Creates a DocString representing a summary of the given instance.
+   */
+  public static DocString summarize(AhatInstance inst) {
+    DocString formatted = new DocString();
+    if (inst == null) {
+      formatted.append("null");
+      return formatted;
+    }
+
+    // Annotate new objects as new.
+    if (inst.getBaseline().isPlaceHolder()) {
+      formatted.append(DocString.added("new "));
+    }
+
+    // Annotate deleted objects as deleted.
+    if (inst.isPlaceHolder()) {
+      formatted.append(DocString.removed("del "));
+    }
+
+    // Annotate unreachable objects as such.
+    if (!inst.isReachable()) {
+      formatted.append("unreachable ");
+    }
+
+    // Annotate roots as roots.
+    if (inst.isRoot()) {
+      formatted.append("root ");
+    }
+
+    // Annotate classes as classes.
+    DocString linkText = new DocString();
+    if (inst.isClassObj()) {
+      linkText.append("class ");
+    }
+
+    linkText.append(inst.toString());
+
+    if (inst.isPlaceHolder()) {
+      // Don't make links to placeholder objects.
+      formatted.append(linkText);
+    } else {
+      URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
+      formatted.appendLink(objTarget, linkText);
+    }
+
+    // Annotate Strings with their values.
+    String stringValue = inst.asString(kMaxChars);
+    if (stringValue != null) {
+      formatted.appendFormat(" \"%s", stringValue);
+      formatted.append(kMaxChars == stringValue.length() ? "..." : "\"");
+    }
+
+    // Annotate Reference with its referent
+    AhatInstance referent = inst.getReferent();
+    if (referent != null) {
+      formatted.append(" for ");
+
+      // It should not be possible for a referent to refer back to the
+      // reference object, even indirectly, so there shouldn't be any issues
+      // with infinite recursion here.
+      formatted.append(summarize(referent));
+    }
+
+    // Annotate DexCache with its location.
+    String dexCacheLocation = inst.getDexCacheLocation(kMaxChars);
+    if (dexCacheLocation != null) {
+      formatted.appendFormat(" for %s", dexCacheLocation);
+      if (kMaxChars == dexCacheLocation.length()) {
+        formatted.append("...");
+      }
+    }
+
+    // Annotate bitmaps with a thumbnail.
+    AhatInstance bitmap = inst.getAssociatedBitmapInstance();
+    String thumbnail = "";
+    if (bitmap != null) {
+      URI uri = DocString.formattedUri("bitmap?id=%d", bitmap.getId());
+      formatted.appendThumbnail(uri, "bitmap image");
+    }
+    return formatted;
+  }
+
+  /**
+   * Creates a DocString summarizing the given value.
+   */
+  public static DocString summarize(Value value) {
+    if (value == null) {
+      return DocString.text("null");
+    }
+    if (value.isAhatInstance()) {
+      return summarize(value.asAhatInstance());
+    }
+    return DocString.text(value.toString());
+  }
+
+  /**
+   * Creates a DocString summarizing the given site.
+   */
+  public static DocString summarize(Site site) {
+    DocString text = DocString.text(site.getMethodName());
+    text.append(site.getSignature());
+    text.append(" - ");
+    text.append(site.getFilename());
+    if (site.getLineNumber() > 0) {
+      text.append(":").append(Integer.toString(site.getLineNumber()));
+    }
+    URI uri = DocString.formattedUri("site?id=%d&depth=%d", site.getId(), site.getDepth());
+    return DocString.link(uri, text);
+  }
+}
diff --git a/tools/ahat/src/Value.java b/tools/ahat/src/Value.java
deleted file mode 100644
index 847692b..0000000
--- a/tools/ahat/src/Value.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Instance;
-import java.net.URI;
-
-/**
- * Class to render an hprof value to a DocString.
- */
-class Value {
-
-  // For string literals, we limit the number of characters we show to
-  // kMaxChars in case the string is really long.
-  private static int kMaxChars = 200;
-
-  /**
-   * Create a DocString representing a summary of the given instance.
-   */
-  private static DocString renderInstance(AhatSnapshot snapshot, Instance inst) {
-    DocString formatted = new DocString();
-    if (inst == null) {
-      formatted.append("(null)");
-      return formatted;
-    }
-
-    // Annotate roots as roots.
-    if (snapshot.isRoot(inst)) {
-      formatted.append("(root) ");
-    }
-
-
-    // Annotate classes as classes.
-    DocString link = new DocString();
-    if (inst instanceof ClassObj) {
-      link.append("class ");
-    }
-
-    link.append(inst.toString());
-
-    URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
-    formatted.appendLink(objTarget, link);
-
-    // Annotate Strings with their values.
-    String stringValue = InstanceUtils.asString(inst, kMaxChars);
-    if (stringValue != null) {
-      formatted.appendFormat(" \"%s", stringValue);
-      formatted.append(kMaxChars == stringValue.length() ? "..." : "\"");
-    }
-
-    // Annotate Reference with its referent
-    Instance referent = InstanceUtils.getReferent(inst);
-    if (referent != null) {
-      formatted.append(" for ");
-
-      // It should not be possible for a referent to refer back to the
-      // reference object, even indirectly, so there shouldn't be any issues
-      // with infinite recursion here.
-      formatted.append(renderInstance(snapshot, referent));
-    }
-
-    // Annotate DexCache with its location.
-    String dexCacheLocation = InstanceUtils.getDexCacheLocation(inst, kMaxChars);
-    if (dexCacheLocation != null) {
-      formatted.appendFormat(" for %s", dexCacheLocation);
-      if (kMaxChars == dexCacheLocation.length()) {
-        formatted.append("...");
-      }
-    }
-
-
-    // Annotate bitmaps with a thumbnail.
-    Instance bitmap = InstanceUtils.getAssociatedBitmapInstance(inst);
-    String thumbnail = "";
-    if (bitmap != null) {
-      URI uri = DocString.formattedUri("bitmap?id=%d", bitmap.getId());
-      formatted.appendThumbnail(uri, "bitmap image");
-    }
-    return formatted;
-  }
-
-  /**
-   * Create a DocString summarizing the given value.
-   */
-  public static DocString render(AhatSnapshot snapshot, Object val) {
-    if (val instanceof Instance) {
-      return renderInstance(snapshot, (Instance)val);
-    } else {
-      return DocString.format("%s", val);
-    }
-  }
-}
diff --git a/tools/ahat/src/heapdump/AhatArrayInstance.java b/tools/ahat/src/heapdump/AhatArrayInstance.java
new file mode 100644
index 0000000..d88cf94
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatArrayInstance.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.heap.ArrayInstance;
+import com.android.tools.perflib.heap.Instance;
+import java.nio.charset.StandardCharsets;
+import java.util.AbstractList;
+import java.util.List;
+
+public class AhatArrayInstance extends AhatInstance {
+  // To save space, we store byte, character, and object arrays directly as
+  // byte, character, and AhatInstance arrays respectively. This is especially
+  // important for large byte arrays, such as bitmaps. All other array types
+  // are stored as an array of objects, though we could potentially save space
+  // by specializing those too. mValues is a list view of the underlying
+  // array.
+  private List<Value> mValues;
+  private byte[] mByteArray;    // null if not a byte array.
+  private char[] mCharArray;    // null if not a char array.
+
+  public AhatArrayInstance(long id) {
+    super(id);
+  }
+
+  @Override void initialize(AhatSnapshot snapshot, Instance inst) {
+    super.initialize(snapshot, inst);
+
+    ArrayInstance array = (ArrayInstance)inst;
+    switch (array.getArrayType()) {
+      case OBJECT:
+        Object[] objects = array.getValues();
+        final AhatInstance[] insts = new AhatInstance[objects.length];
+        for (int i = 0; i < objects.length; i++) {
+          if (objects[i] != null) {
+            Instance ref = (Instance)objects[i];
+            insts[i] = snapshot.findInstance(ref.getId());
+            if (ref.getNextInstanceToGcRoot() == inst) {
+              String field = "[" + Integer.toString(i) + "]";
+              insts[i].setNextInstanceToGcRoot(this, field);
+            }
+          }
+        }
+        mValues = new AbstractList<Value>() {
+          @Override public int size() {
+            return insts.length;
+          }
+
+          @Override public Value get(int index) {
+            AhatInstance obj = insts[index];
+            return obj == null ? null : new Value(insts[index]);
+          }
+        };
+        break;
+
+      case CHAR:
+        final char[] chars = array.asCharArray(0, array.getLength());
+        mCharArray = chars;
+        mValues = new AbstractList<Value>() {
+          @Override public int size() {
+            return chars.length;
+          }
+
+          @Override public Value get(int index) {
+            return new Value(chars[index]);
+          }
+        };
+        break;
+
+      case BYTE:
+        final byte[] bytes = array.asRawByteArray(0, array.getLength());
+        mByteArray = bytes;
+        mValues = new AbstractList<Value>() {
+          @Override public int size() {
+            return bytes.length;
+          }
+
+          @Override public Value get(int index) {
+            return new Value(bytes[index]);
+          }
+        };
+        break;
+
+      default:
+        final Object[] values = array.getValues();
+        mValues = new AbstractList<Value>() {
+          @Override public int size() {
+            return values.length;
+          }
+
+          @Override public Value get(int index) {
+            Object obj = values[index];
+            return obj == null ? null : new Value(obj);
+          }
+        };
+        break;
+    }
+  }
+
+  /**
+   * Returns the length of the array.
+   */
+  public int getLength() {
+    return mValues.size();
+  }
+
+  /**
+   * Returns the array's values.
+   */
+  public List<Value> getValues() {
+    return mValues;
+  }
+
+  /**
+   * Returns the object at the given index of this array.
+   */
+  public Value getValue(int index) {
+    return mValues.get(index);
+  }
+
+  @Override public boolean isArrayInstance() {
+    return true;
+  }
+
+  @Override public AhatArrayInstance asArrayInstance() {
+    return this;
+  }
+
+  @Override public String asString(int maxChars) {
+    return asString(0, getLength(), maxChars);
+  }
+
+  /**
+   * Returns the String value associated with this array.
+   * Only char arrays are considered as having an associated String value.
+   */
+  String asString(int offset, int count, int maxChars) {
+    if (mCharArray == null) {
+      return null;
+    }
+
+    if (count == 0) {
+      return "";
+    }
+    int numChars = mCharArray.length;
+    if (0 <= maxChars && maxChars < count) {
+      count = maxChars;
+    }
+
+    int end = offset + count - 1;
+    if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
+      return new String(mCharArray, offset, count);
+    }
+    return null;
+  }
+
+  /**
+   * Returns the ascii String value associated with this array.
+   * Only byte arrays are considered as having an associated ascii String value.
+   */
+  String asAsciiString(int offset, int count, int maxChars) {
+    if (mByteArray == null) {
+      return null;
+    }
+
+    if (count == 0) {
+      return "";
+    }
+    int numChars = mByteArray.length;
+    if (0 <= maxChars && maxChars < count) {
+      count = maxChars;
+    }
+
+    int end = offset + count - 1;
+    if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
+      return new String(mByteArray, offset, count, StandardCharsets.US_ASCII);
+    }
+    return null;
+  }
+
+  /**
+   * Returns the String value associated with this array. Byte arrays are
+   * considered as ascii encoded strings.
+   */
+  String asMaybeCompressedString(int offset, int count, int maxChars) {
+    String str = asString(offset, count, maxChars);
+    if (str == null) {
+      str = asAsciiString(offset, count, maxChars);
+    }
+    return str;
+  }
+
+  @Override public AhatInstance getAssociatedBitmapInstance() {
+    if (mByteArray != null) {
+      List<AhatInstance> refs = getHardReverseReferences();
+      if (refs.size() == 1) {
+        AhatInstance ref = refs.get(0);
+        return ref.getAssociatedBitmapInstance();
+      }
+    }
+    return null;
+  }
+
+  @Override public String toString() {
+    String className = getClassName();
+    if (className.endsWith("[]")) {
+      className = className.substring(0, className.length() - 2);
+    }
+    return String.format("%s[%d]@%08x", className, mValues.size(), getId());
+  }
+
+  byte[] asByteArray() {
+    return mByteArray;
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatClassInstance.java b/tools/ahat/src/heapdump/AhatClassInstance.java
new file mode 100644
index 0000000..273530a
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatClassInstance.java
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.heap.ClassInstance;
+import com.android.tools.perflib.heap.Instance;
+import java.awt.image.BufferedImage;
+import java.util.Arrays;
+import java.util.List;
+
+public class AhatClassInstance extends AhatInstance {
+  private FieldValue[] mFieldValues;
+
+  public AhatClassInstance(long id) {
+    super(id);
+  }
+
+  @Override void initialize(AhatSnapshot snapshot, Instance inst) {
+    super.initialize(snapshot, inst);
+
+    ClassInstance classInst = (ClassInstance)inst;
+    List<ClassInstance.FieldValue> fieldValues = classInst.getValues();
+    mFieldValues = new FieldValue[fieldValues.size()];
+    for (int i = 0; i < mFieldValues.length; i++) {
+      ClassInstance.FieldValue field = fieldValues.get(i);
+      String name = field.getField().getName();
+      String type = field.getField().getType().toString();
+      Value value = snapshot.getValue(field.getValue());
+
+      mFieldValues[i] = new FieldValue(name, type, value);
+
+      if (field.getValue() instanceof Instance) {
+        Instance ref = (Instance)field.getValue();
+        if (ref.getNextInstanceToGcRoot() == inst) {
+          value.asAhatInstance().setNextInstanceToGcRoot(this, "." + name);
+        }
+      }
+    }
+  }
+
+  @Override public Value getField(String fieldName) {
+    for (FieldValue field : mFieldValues) {
+      if (fieldName.equals(field.getName())) {
+        return field.getValue();
+      }
+    }
+    return null;
+  }
+
+  @Override public AhatInstance getRefField(String fieldName) {
+    Value value = getField(fieldName);
+    return value == null ? null : value.asAhatInstance();
+  }
+
+  /**
+   * Read an int field of an instance.
+   * The field is assumed to be an int type.
+   * Returns <code>def</code> if the field value is not an int or could not be
+   * read.
+   */
+  private Integer getIntField(String fieldName, Integer def) {
+    Value value = getField(fieldName);
+    if (value == null || !value.isInteger()) {
+      return def;
+    }
+    return value.asInteger();
+  }
+
+  /**
+   * Read a long field of this instance.
+   * The field is assumed to be a long type.
+   * Returns <code>def</code> if the field value is not an long or could not
+   * be read.
+   */
+  private Long getLongField(String fieldName, Long def) {
+    Value value = getField(fieldName);
+    if (value == null || !value.isLong()) {
+      return def;
+    }
+    return value.asLong();
+  }
+
+  /**
+   * Returns the list of class instance fields for this instance.
+   */
+  public List<FieldValue> getInstanceFields() {
+    return Arrays.asList(mFieldValues);
+  }
+
+  /**
+   * Returns true if this is an instance of a class with the given name.
+   */
+  private boolean isInstanceOfClass(String className) {
+    AhatClassObj cls = getClassObj();
+    while (cls != null) {
+      if (className.equals(cls.getName())) {
+        return true;
+      }
+      cls = cls.getSuperClassObj();
+    }
+    return false;
+  }
+
+  @Override public String asString(int maxChars) {
+    if (!isInstanceOfClass("java.lang.String")) {
+      return null;
+    }
+
+    Value value = getField("value");
+    if (!value.isAhatInstance()) {
+      return null;
+    }
+
+    AhatInstance inst = value.asAhatInstance();
+    if (inst.isArrayInstance()) {
+      AhatArrayInstance chars = inst.asArrayInstance();
+      int numChars = chars.getLength();
+      int count = getIntField("count", numChars);
+      int offset = getIntField("offset", 0);
+      return chars.asMaybeCompressedString(offset, count, maxChars);
+    }
+    return null;
+  }
+
+  @Override public AhatInstance getReferent() {
+    if (isInstanceOfClass("java.lang.ref.Reference")) {
+      return getRefField("referent");
+    }
+    return null;
+  }
+
+  @Override public String getDexCacheLocation(int maxChars) {
+    if (isInstanceOfClass("java.lang.DexCache")) {
+      AhatInstance location = getRefField("location");
+      if (location != null) {
+        return location.asString(maxChars);
+      }
+    }
+    return null;
+  }
+
+  @Override public AhatInstance getAssociatedBitmapInstance() {
+    if (isInstanceOfClass("android.graphics.Bitmap")) {
+      return this;
+    }
+    return null;
+  }
+
+  @Override public boolean isClassInstance() {
+    return true;
+  }
+
+  @Override public AhatClassInstance asClassInstance() {
+    return this;
+  }
+
+  @Override public String toString() {
+    return String.format("%s@%08x", getClassName(), getId());
+  }
+
+  /**
+   * Read the given field from the given instance.
+   * The field is assumed to be a byte[] field.
+   * Returns null if the field value is null, not a byte[] or could not be read.
+   */
+  private byte[] getByteArrayField(String fieldName) {
+    Value value = getField(fieldName);
+    if (!value.isAhatInstance()) {
+      return null;
+    }
+    return value.asAhatInstance().asByteArray();
+  }
+
+  public BufferedImage asBitmap() {
+    if (!isInstanceOfClass("android.graphics.Bitmap")) {
+      return null;
+    }
+
+    Integer width = getIntField("mWidth", null);
+    if (width == null) {
+      return null;
+    }
+
+    Integer height = getIntField("mHeight", null);
+    if (height == null) {
+      return null;
+    }
+
+    byte[] buffer = getByteArrayField("mBuffer");
+    if (buffer == null) {
+      return null;
+    }
+
+    // Convert the raw data to an image
+    // Convert BGRA to ABGR
+    int[] abgr = new int[height * width];
+    for (int i = 0; i < abgr.length; i++) {
+      abgr[i] = (
+          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
+          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
+          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
+          + ((int) buffer[i * 4 + 2] & 0xFF));
+    }
+
+    BufferedImage bitmap = new BufferedImage(
+        width, height, BufferedImage.TYPE_4BYTE_ABGR);
+    bitmap.setRGB(0, 0, width, height, abgr, 0, width);
+    return bitmap;
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatClassObj.java b/tools/ahat/src/heapdump/AhatClassObj.java
new file mode 100644
index 0000000..c5ade1d
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatClassObj.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.heap.ClassObj;
+import com.android.tools.perflib.heap.Field;
+import com.android.tools.perflib.heap.Instance;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+public class AhatClassObj extends AhatInstance {
+  private String mClassName;
+  private AhatClassObj mSuperClassObj;
+  private AhatInstance mClassLoader;
+  private FieldValue[] mStaticFieldValues;
+
+  public AhatClassObj(long id) {
+    super(id);
+  }
+
+  @Override void initialize(AhatSnapshot snapshot, Instance inst) {
+    super.initialize(snapshot, inst);
+
+    ClassObj classObj = (ClassObj)inst;
+    mClassName = classObj.getClassName();
+
+    ClassObj superClassObj = classObj.getSuperClassObj();
+    if (superClassObj != null) {
+      mSuperClassObj = snapshot.findClassObj(superClassObj.getId());
+    }
+
+    Instance loader = classObj.getClassLoader();
+    if (loader != null) {
+      mClassLoader = snapshot.findInstance(loader.getId());
+    }
+
+    Collection<Map.Entry<Field, Object>> fieldValues = classObj.getStaticFieldValues().entrySet();
+    mStaticFieldValues = new FieldValue[fieldValues.size()];
+    int index = 0;
+    for (Map.Entry<Field, Object> field : fieldValues) {
+      String name = field.getKey().getName();
+      String type = field.getKey().getType().toString();
+      Value value = snapshot.getValue(field.getValue());
+      mStaticFieldValues[index++] = new FieldValue(name, type, value);
+
+      if (field.getValue() instanceof Instance) {
+        Instance ref = (Instance)field.getValue();
+        if (ref.getNextInstanceToGcRoot() == inst) {
+          value.asAhatInstance().setNextInstanceToGcRoot(this, "." + name);
+        }
+      }
+    }
+  }
+
+  /**
+   * Returns the name of the class this is a class object for.
+   */
+  public String getName() {
+    return mClassName;
+  }
+
+  /**
+   * Returns the superclass of this class object.
+   */
+  public AhatClassObj getSuperClassObj() {
+    return mSuperClassObj;
+  }
+
+  /**
+   * Returns the class loader of this class object.
+   */
+  public AhatInstance getClassLoader() {
+    return mClassLoader;
+  }
+
+  /**
+   * Returns the static field values for this class object.
+   */
+  public List<FieldValue> getStaticFieldValues() {
+    return Arrays.asList(mStaticFieldValues);
+  }
+
+  @Override public boolean isClassObj() {
+    return true;
+  }
+
+  @Override public AhatClassObj asClassObj() {
+    return this;
+  }
+
+  @Override public String toString() {
+    return mClassName;
+  }
+
+  @Override AhatInstance newPlaceHolderInstance() {
+    return new AhatPlaceHolderClassObj(this);
+  }
+}
+
diff --git a/tools/ahat/src/heapdump/AhatField.java b/tools/ahat/src/heapdump/AhatField.java
new file mode 100644
index 0000000..a25ee28
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatField.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+public class AhatField {
+  private final String mName;
+  private final String mType;
+
+  public AhatField(String name, String type) {
+    mName = name;
+    mType = type;
+  }
+
+  /**
+   * Returns the name of the field.
+   */
+  public String getName() {
+    return mName;
+  }
+
+  /**
+   * Returns a description of the type of the field.
+   */
+  public String getType() {
+    return mType;
+  }
+}
+
diff --git a/tools/ahat/src/heapdump/AhatHeap.java b/tools/ahat/src/heapdump/AhatHeap.java
new file mode 100644
index 0000000..c39adc4
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatHeap.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+public class AhatHeap implements Diffable<AhatHeap> {
+  private String mName;
+  private long mSize = 0;
+  private int mIndex;
+  private AhatHeap mBaseline;
+  private boolean mIsPlaceHolder = false;
+
+  AhatHeap(String name, int index) {
+    mName = name;
+    mIndex = index;
+    mBaseline = this;
+  }
+
+  /**
+   * Construct a place holder heap.
+   */
+  private AhatHeap(String name, AhatHeap baseline) {
+    mName = name;
+    mIndex = -1;
+    mBaseline = baseline;
+    baseline.setBaseline(this);
+    mIsPlaceHolder = true;
+  }
+
+  /**
+   * Construct a new place holder heap that has the given baseline heap.
+   */
+  static AhatHeap newPlaceHolderHeap(String name, AhatHeap baseline) {
+    return new AhatHeap(name, baseline);
+  }
+
+  void addToSize(long increment) {
+    mSize += increment;
+  }
+
+  /**
+   * Returns a unique instance for this heap between 0 and the total number of
+   * heaps in this snapshot, or -1 if this is a placeholder heap.
+   */
+  int getIndex() {
+    return mIndex;
+  }
+
+  /**
+   * Returns the name of this heap.
+   */
+  public String getName() {
+    return mName;
+  }
+
+  /**
+   * Returns the total number of bytes allocated on this heap.
+   */
+  public long getSize() {
+    return mSize;
+  }
+
+  void setBaseline(AhatHeap baseline) {
+    mBaseline = baseline;
+  }
+
+  @Override
+  public AhatHeap getBaseline() {
+    return mBaseline;
+  }
+
+  @Override
+  public boolean isPlaceHolder() {
+    return mIsPlaceHolder;
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatInstance.java b/tools/ahat/src/heapdump/AhatInstance.java
new file mode 100644
index 0000000..e6b9c00
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatInstance.java
@@ -0,0 +1,455 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.heap.ClassObj;
+import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.RootObj;
+import java.awt.image.BufferedImage;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+public abstract class AhatInstance implements Diffable<AhatInstance> {
+  private long mId;
+  private long mSize;
+  private long mTotalRetainedSize;
+  private long mRetainedSizes[];      // Retained size indexed by heap index
+  private boolean mIsReachable;
+  private AhatHeap mHeap;
+  private AhatInstance mImmediateDominator;
+  private AhatInstance mNextInstanceToGcRoot;
+  private String mNextInstanceToGcRootField = "???";
+  private AhatClassObj mClassObj;
+  private AhatInstance[] mHardReverseReferences;
+  private AhatInstance[] mSoftReverseReferences;
+  private Site mSite;
+
+  // If this instance is a root, mRootTypes contains a set of the root types.
+  // If this instance is not a root, mRootTypes is null.
+  private List<String> mRootTypes;
+
+  // List of instances this instance immediately dominates.
+  private List<AhatInstance> mDominated = new ArrayList<AhatInstance>();
+
+  private AhatInstance mBaseline;
+
+  public AhatInstance(long id) {
+    mId = id;
+    mBaseline = this;
+  }
+
+  /**
+   * Initializes this AhatInstance based on the given perflib instance.
+   * The AhatSnapshot should be used to look up AhatInstances and AhatHeaps.
+   * There is no guarantee that the AhatInstances returned by
+   * snapshot.findInstance have been initialized yet.
+   */
+  void initialize(AhatSnapshot snapshot, Instance inst) {
+    mId = inst.getId();
+    mSize = inst.getSize();
+    mTotalRetainedSize = inst.getTotalRetainedSize();
+    mIsReachable = inst.isReachable();
+
+    List<AhatHeap> heaps = snapshot.getHeaps();
+    mRetainedSizes = new long[heaps.size()];
+    for (AhatHeap heap : heaps) {
+      mRetainedSizes[heap.getIndex()] = inst.getRetainedSize(heap.getIndex());
+    }
+
+    mHeap = snapshot.getHeap(inst.getHeap().getName());
+
+    Instance dom = inst.getImmediateDominator();
+    if (dom == null || dom instanceof RootObj) {
+      mImmediateDominator = null;
+    } else {
+      mImmediateDominator = snapshot.findInstance(dom.getId());
+      mImmediateDominator.mDominated.add(this);
+    }
+
+    ClassObj clsObj = inst.getClassObj();
+    if (clsObj != null) {
+      mClassObj = snapshot.findClassObj(clsObj.getId());
+    }
+
+    // A couple notes about reverse references:
+    // * perflib sometimes returns unreachable reverse references. If
+    //   snapshot.findInstance returns null, it means the reverse reference is
+    //   not reachable, so we filter it out.
+    // * We store the references as AhatInstance[] instead of
+    //   ArrayList<AhatInstance> because it saves a lot of space and helps
+    //   with performance when there are a lot of AhatInstances.
+    ArrayList<AhatInstance> ahatRefs = new ArrayList<AhatInstance>();
+    ahatRefs = new ArrayList<AhatInstance>();
+    for (Instance ref : inst.getHardReverseReferences()) {
+      AhatInstance ahat = snapshot.findInstance(ref.getId());
+      if (ahat != null) {
+        ahatRefs.add(ahat);
+      }
+    }
+    mHardReverseReferences = new AhatInstance[ahatRefs.size()];
+    ahatRefs.toArray(mHardReverseReferences);
+
+    List<Instance> refs = inst.getSoftReverseReferences();
+    ahatRefs.clear();
+    if (refs != null) {
+      for (Instance ref : refs) {
+        AhatInstance ahat = snapshot.findInstance(ref.getId());
+        if (ahat != null) {
+          ahatRefs.add(ahat);
+        }
+      }
+    }
+    mSoftReverseReferences = new AhatInstance[ahatRefs.size()];
+    ahatRefs.toArray(mSoftReverseReferences);
+  }
+
+  /**
+   * Returns a unique identifier for the instance.
+   */
+  public long getId() {
+    return mId;
+  }
+
+  /**
+   * Returns the shallow number of bytes this object takes up.
+   */
+  public long getSize() {
+    return mSize;
+  }
+
+  /**
+   * Returns the number of bytes belonging to the given heap that this instance
+   * retains.
+   */
+  public long getRetainedSize(AhatHeap heap) {
+    int index = heap.getIndex();
+    return 0 <= index && index < mRetainedSizes.length ? mRetainedSizes[heap.getIndex()] : 0;
+  }
+
+  /**
+   * Returns the total number of bytes this instance retains.
+   */
+  public long getTotalRetainedSize() {
+    return mTotalRetainedSize;
+  }
+
+  /**
+   * Returns whether this object is strongly-reachable.
+   */
+  public boolean isReachable() {
+    return mIsReachable;
+  }
+
+  /**
+   * Returns the heap that this instance is allocated on.
+   */
+  public AhatHeap getHeap() {
+    return mHeap;
+  }
+
+  /**
+   * Returns true if this instance is marked as a root instance.
+   */
+  public boolean isRoot() {
+    return mRootTypes != null;
+  }
+
+  /**
+   * Marks this instance as being a root of the given type.
+   */
+  void addRootType(String type) {
+    if (mRootTypes == null) {
+      mRootTypes = new ArrayList<String>();
+      mRootTypes.add(type);
+    } else if (!mRootTypes.contains(type)) {
+      mRootTypes.add(type);
+    }
+  }
+
+  /**
+   * Returns a list of string descriptions of the root types of this object.
+   * Returns null if this object is not a root.
+   */
+  public Collection<String> getRootTypes() {
+    return mRootTypes;
+  }
+
+  /**
+   * Returns the immediate dominator of this instance.
+   * Returns null if this is a root instance.
+   */
+  public AhatInstance getImmediateDominator() {
+    return mImmediateDominator;
+  }
+
+  /**
+   * Returns a list of those objects immediately dominated by the given
+   * instance.
+   */
+  public List<AhatInstance> getDominated() {
+    return mDominated;
+  }
+
+  /**
+   * Returns the site where this instance was allocated.
+   */
+  public Site getSite() {
+    return mSite;
+  }
+
+  /**
+   * Sets the allocation site of this instance.
+   */
+  void setSite(Site site) {
+    mSite = site;
+  }
+
+  /**
+   * Returns true if the given instance is a class object
+   */
+  public boolean isClassObj() {
+    // Overridden by AhatClassObj.
+    return false;
+  }
+
+  /**
+   * Returns this as an AhatClassObj if this is an AhatClassObj.
+   * Returns null if this is not an AhatClassObj.
+   */
+  public AhatClassObj asClassObj() {
+    // Overridden by AhatClassObj.
+    return null;
+  }
+
+  /**
+   * Returns the class object instance for the class of this object.
+   */
+  public AhatClassObj getClassObj() {
+    return mClassObj;
+  }
+
+  /**
+   * Returns the name of the class this object belongs to.
+   */
+  public String getClassName() {
+    AhatClassObj classObj = getClassObj();
+    return classObj == null ? "???" : classObj.getName();
+  }
+
+  /**
+   * Returns true if the given instance is an array instance
+   */
+  public boolean isArrayInstance() {
+    // Overridden by AhatArrayInstance.
+    return false;
+  }
+
+  /**
+   * Returns this as an AhatArrayInstance if this is an AhatArrayInstance.
+   * Returns null if this is not an AhatArrayInstance.
+   */
+  public AhatArrayInstance asArrayInstance() {
+    // Overridden by AhatArrayInstance.
+    return null;
+  }
+
+  /**
+   * Returns true if the given instance is a class instance
+   */
+  public boolean isClassInstance() {
+    return false;
+  }
+
+  /**
+   * Returns this as an AhatClassInstance if this is an AhatClassInstance.
+   * Returns null if this is not an AhatClassInstance.
+   */
+  public AhatClassInstance asClassInstance() {
+    return null;
+  }
+
+  /**
+   * Return the referent associated with this instance.
+   * This is relevent for instances of java.lang.ref.Reference.
+   * Returns null if the instance has no referent associated with it.
+   */
+  public AhatInstance getReferent() {
+    // Overridden by AhatClassInstance.
+    return null;
+  }
+
+  /**
+   * Returns a list of objects with hard references to this object.
+   */
+  public List<AhatInstance> getHardReverseReferences() {
+    return Arrays.asList(mHardReverseReferences);
+  }
+
+  /**
+   * Returns a list of objects with soft references to this object.
+   */
+  public List<AhatInstance> getSoftReverseReferences() {
+    return Arrays.asList(mSoftReverseReferences);
+  }
+
+  /**
+   * Returns the value of a field of an instance.
+   * Returns null if the field value is null, the field couldn't be read, or
+   * there are multiple fields with the same name.
+   */
+  public Value getField(String fieldName) {
+    // Overridden by AhatClassInstance.
+    return null;
+  }
+
+  /**
+   * Reads a reference field of this instance.
+   * Returns null if the field value is null, or if the field couldn't be read.
+   */
+  public AhatInstance getRefField(String fieldName) {
+    // Overridden by AhatClassInstance.
+    return null;
+  }
+
+  /**
+   * Assuming inst represents a DexCache object, return the dex location for
+   * that dex cache. Returns null if the given instance doesn't represent a
+   * DexCache object or the location could not be found.
+   * If maxChars is non-negative, the returned location is truncated to
+   * maxChars in length.
+   */
+  public String getDexCacheLocation(int maxChars) {
+    return null;
+  }
+
+  /**
+   * Return the bitmap instance associated with this object, or null if there
+   * is none. This works for android.graphics.Bitmap instances and their
+   * underlying Byte[] instances.
+   */
+  public AhatInstance getAssociatedBitmapInstance() {
+    return null;
+  }
+
+  /**
+   * Read the string value from this instance.
+   * Returns null if this object can't be interpreted as a string.
+   * The returned string is truncated to maxChars characters.
+   * If maxChars is negative, the returned string is not truncated.
+   */
+  public String asString(int maxChars) {
+    // By default instances can't be interpreted as a string. This method is
+    // overridden by AhatClassInstance and AhatArrayInstance for those cases
+    // when an instance can be interpreted as a string.
+    return null;
+  }
+
+  /**
+   * Reads the string value from an hprof Instance.
+   * Returns null if the object can't be interpreted as a string.
+   */
+  public String asString() {
+    return asString(-1);
+  }
+
+  /**
+   * Return the bitmap associated with the given instance, if any.
+   * This is relevant for instances of android.graphics.Bitmap and byte[].
+   * Returns null if there is no bitmap associated with the given instance.
+   */
+  public BufferedImage asBitmap() {
+    return null;
+  }
+
+  /**
+   * Returns a sample path from a GC root to this instance.
+   * This instance is included as the last element of the path with an empty
+   * field description.
+   */
+  public List<PathElement> getPathFromGcRoot() {
+    List<PathElement> path = new ArrayList<PathElement>();
+
+    AhatInstance dom = this;
+    for (PathElement elem = new PathElement(this, ""); elem != null;
+        elem = getNextPathElementToGcRoot(elem.instance)) {
+      if (elem.instance.equals(dom)) {
+        elem.isDominator = true;
+        dom = dom.getImmediateDominator();
+      }
+      path.add(elem);
+    }
+    Collections.reverse(path);
+    return path;
+  }
+
+  /**
+   * Returns the next instance to GC root from this object and a string
+   * description of which field of that object refers to the given instance.
+   * Returns null if the given instance has no next instance to the gc root.
+   */
+  private static PathElement getNextPathElementToGcRoot(AhatInstance inst) {
+    AhatInstance parent = inst.mNextInstanceToGcRoot;
+    if (parent == null) {
+      return null;
+    }
+    return new PathElement(inst.mNextInstanceToGcRoot, inst.mNextInstanceToGcRootField);
+  }
+
+  void setNextInstanceToGcRoot(AhatInstance inst, String field) {
+    mNextInstanceToGcRoot = inst;
+    mNextInstanceToGcRootField = field;
+  }
+
+  /** Returns a human-readable identifier for this object.
+   * For class objects, the string is the class name.
+   * For class instances, the string is the class name followed by '@' and the
+   * hex id of the instance.
+   * For array instances, the string is the array type followed by the size in
+   * square brackets, followed by '@' and the hex id of the instance.
+   */
+  @Override public abstract String toString();
+
+  /**
+   * Read the byte[] value from an hprof Instance.
+   * Returns null if the instance is not a byte array.
+   */
+  byte[] asByteArray() {
+    return null;
+  }
+
+  public void setBaseline(AhatInstance baseline) {
+    mBaseline = baseline;
+  }
+
+  @Override public AhatInstance getBaseline() {
+    return mBaseline;
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return false;
+  }
+
+  /**
+   * Returns a new place holder instance corresponding to this instance.
+   */
+  AhatInstance newPlaceHolderInstance() {
+    return new AhatPlaceHolderInstance(this);
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java b/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java
new file mode 100644
index 0000000..c6ad87f
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+/**
+ * PlaceHolder instance to take the place of a real AhatClassObj for
+ * the purposes of displaying diffs.
+ *
+ * This should be created through a call to newPlaceHolder();
+ */
+public class AhatPlaceHolderClassObj extends AhatClassObj {
+  AhatPlaceHolderClassObj(AhatClassObj baseline) {
+    super(-1);
+    setBaseline(baseline);
+    baseline.setBaseline(this);
+  }
+
+  @Override public long getSize() {
+    return 0;
+  }
+
+  @Override public long getRetainedSize(AhatHeap heap) {
+    return 0;
+  }
+
+  @Override public long getTotalRetainedSize() {
+    return 0;
+  }
+
+  @Override public AhatHeap getHeap() {
+    return getBaseline().getHeap().getBaseline();
+  }
+
+  @Override public String getClassName() {
+    return getBaseline().getClassName();
+  }
+
+  @Override public String toString() {
+    return getBaseline().toString();
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return true;
+  }
+
+  @Override public String getName() {
+    return getBaseline().asClassObj().getName();
+  }
+
+  @Override public AhatClassObj getSuperClassObj() {
+    return getBaseline().asClassObj().getSuperClassObj().getBaseline().asClassObj();
+  }
+
+  @Override public AhatInstance getClassLoader() {
+    return getBaseline().asClassObj().getClassLoader().getBaseline();
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java b/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java
new file mode 100644
index 0000000..9412eae
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+/**
+ * Generic PlaceHolder instance to take the place of a real AhatInstance for
+ * the purposes of displaying diffs.
+ *
+ * This should be created through a call to AhatInstance.newPlaceHolder();
+ */
+public class AhatPlaceHolderInstance extends AhatInstance {
+  AhatPlaceHolderInstance(AhatInstance baseline) {
+    super(-1);
+    setBaseline(baseline);
+    baseline.setBaseline(this);
+  }
+
+  @Override public long getSize() {
+    return 0;
+  }
+
+  @Override public long getRetainedSize(AhatHeap heap) {
+    return 0;
+  }
+
+  @Override public long getTotalRetainedSize() {
+    return 0;
+  }
+
+  @Override public AhatHeap getHeap() {
+    return getBaseline().getHeap().getBaseline();
+  }
+
+  @Override public String getClassName() {
+    return getBaseline().getClassName();
+  }
+
+  @Override public String asString(int maxChars) {
+    return getBaseline().asString(maxChars);
+  }
+
+  @Override public String toString() {
+    return getBaseline().toString();
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return true;
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatSnapshot.java b/tools/ahat/src/heapdump/AhatSnapshot.java
new file mode 100644
index 0000000..20b85da
--- /dev/null
+++ b/tools/ahat/src/heapdump/AhatSnapshot.java
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.captures.DataBuffer;
+import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
+import com.android.tools.perflib.heap.ArrayInstance;
+import com.android.tools.perflib.heap.ClassInstance;
+import com.android.tools.perflib.heap.ClassObj;
+import com.android.tools.perflib.heap.Heap;
+import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.ProguardMap;
+import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.Snapshot;
+import com.android.tools.perflib.heap.StackFrame;
+import com.android.tools.perflib.heap.StackTrace;
+import gnu.trove.TObjectProcedure;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class AhatSnapshot implements Diffable<AhatSnapshot> {
+  private final Site mRootSite = new Site("ROOT");
+
+  // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
+  private final List<AhatInstance> mRooted = new ArrayList<AhatInstance>();
+
+  // List of all ahat instances stored in increasing order by id.
+  private final List<AhatInstance> mInstances = new ArrayList<AhatInstance>();
+
+  // Map from class name to class object.
+  private final Map<String, AhatClassObj> mClasses = new HashMap<String, AhatClassObj>();
+
+  private final List<AhatHeap> mHeaps = new ArrayList<AhatHeap>();
+
+  private AhatSnapshot mBaseline = this;
+
+  /**
+   * Create an AhatSnapshot from an hprof file.
+   */
+  public static AhatSnapshot fromHprof(File hprof, ProguardMap map) throws IOException {
+    return fromDataBuffer(new MemoryMappedFileBuffer(hprof), map);
+  }
+
+  /**
+   * Create an AhatSnapshot from an in-memory data buffer.
+   */
+  public static AhatSnapshot fromDataBuffer(DataBuffer buffer, ProguardMap map) throws IOException {
+    AhatSnapshot snapshot = new AhatSnapshot(buffer, map);
+
+    // Request a GC now to clean up memory used by perflib. This helps to
+    // avoid a noticable pause when visiting the first interesting page in
+    // ahat.
+    System.gc();
+
+    return snapshot;
+  }
+
+  /**
+   * Constructs an AhatSnapshot for the given hprof binary data.
+   */
+  private AhatSnapshot(DataBuffer buffer, ProguardMap map) throws IOException {
+    Snapshot snapshot = Snapshot.createSnapshot(buffer, map);
+    snapshot.computeDominators();
+
+    // Properly label the class of class objects in the perflib snapshot, and
+    // count the total number of instances.
+    final ClassObj javaLangClass = snapshot.findClass("java.lang.Class");
+    if (javaLangClass != null) {
+      for (Heap heap : snapshot.getHeaps()) {
+        Collection<ClassObj> classes = heap.getClasses();
+        for (ClassObj clsObj : classes) {
+          if (clsObj.getClassObj() == null) {
+            clsObj.setClassId(javaLangClass.getId());
+          }
+        }
+      }
+    }
+
+    // Create mappings from id to ahat instance and heaps.
+    Collection<Heap> heaps = snapshot.getHeaps();
+    for (Heap heap : heaps) {
+      // Note: mHeaps will not be in index order if snapshot.getHeaps does not
+      // return heaps in index order. That's fine, because we don't rely on
+      // mHeaps being in index order.
+      mHeaps.add(new AhatHeap(heap.getName(), snapshot.getHeapIndex(heap)));
+      TObjectProcedure<Instance> doCreate = new TObjectProcedure<Instance>() {
+        @Override
+        public boolean execute(Instance inst) {
+          long id = inst.getId();
+          if (inst instanceof ClassInstance) {
+            mInstances.add(new AhatClassInstance(id));
+          } else if (inst instanceof ArrayInstance) {
+            mInstances.add(new AhatArrayInstance(id));
+          } else if (inst instanceof ClassObj) {
+            AhatClassObj classObj = new AhatClassObj(id);
+            mInstances.add(classObj);
+            mClasses.put(((ClassObj)inst).getClassName(), classObj);
+          }
+          return true;
+        }
+      };
+      for (Instance instance : heap.getClasses()) {
+        doCreate.execute(instance);
+      }
+      heap.forEachInstance(doCreate);
+    }
+
+    // Sort the instances by id so we can use binary search to lookup
+    // instances by id.
+    mInstances.sort(new Comparator<AhatInstance>() {
+      @Override
+      public int compare(AhatInstance a, AhatInstance b) {
+        return Long.compare(a.getId(), b.getId());
+      }
+    });
+
+    // Initialize ahat snapshot and instances based on the perflib snapshot
+    // and instances.
+    for (AhatInstance ahat : mInstances) {
+      Instance inst = snapshot.findInstance(ahat.getId());
+      ahat.initialize(this, inst);
+
+      if (inst.getImmediateDominator() == Snapshot.SENTINEL_ROOT) {
+        mRooted.add(ahat);
+      }
+
+      if (inst.isReachable()) {
+        ahat.getHeap().addToSize(ahat.getSize());
+      }
+
+      // Update sites.
+      StackFrame[] frames = null;
+      StackTrace stack = inst.getStack();
+      if (stack != null) {
+        frames = stack.getFrames();
+      }
+      Site site = mRootSite.add(frames, frames == null ? 0 : frames.length, ahat);
+      ahat.setSite(site);
+    }
+
+    // Record the roots and their types.
+    for (RootObj root : snapshot.getGCRoots()) {
+      Instance inst = root.getReferredInstance();
+      if (inst != null) {
+        findInstance(inst.getId()).addRootType(root.getRootType().toString());
+      }
+    }
+    snapshot.dispose();
+  }
+
+  /**
+   * Returns the instance with given id in this snapshot.
+   * Returns null if no instance with the given id is found.
+   */
+  public AhatInstance findInstance(long id) {
+    // Binary search over the sorted instances.
+    int start = 0;
+    int end = mInstances.size();
+    while (start < end) {
+      int mid = start + ((end - start) / 2);
+      AhatInstance midInst = mInstances.get(mid);
+      long midId = midInst.getId();
+      if (id == midId) {
+        return midInst;
+      } else if (id < midId) {
+        end = mid;
+      } else {
+        start = mid + 1;
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Returns the AhatClassObj with given id in this snapshot.
+   * Returns null if no class object with the given id is found.
+   */
+  public AhatClassObj findClassObj(long id) {
+    AhatInstance inst = findInstance(id);
+    return inst == null ? null : inst.asClassObj();
+  }
+
+  /**
+   * Returns the class object for the class with given name.
+   * Returns null if there is no class object for the given name.
+   * Note: This method is exposed for testing purposes.
+   */
+  public AhatClassObj findClass(String name) {
+    return mClasses.get(name);
+  }
+
+  /**
+   * Returns the heap with the given name, if any.
+   * Returns null if no heap with the given name could be found.
+   */
+  public AhatHeap getHeap(String name) {
+    // We expect a small number of heaps (maybe 3 or 4 total), so a linear
+    // search should be acceptable here performance wise.
+    for (AhatHeap heap : getHeaps()) {
+      if (heap.getName().equals(name)) {
+        return heap;
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Returns a list of heaps in the snapshot in canonical order.
+   * Modifications to the returned list are visible to this AhatSnapshot,
+   * which is used by diff to insert place holder heaps.
+   */
+  public List<AhatHeap> getHeaps() {
+    return mHeaps;
+  }
+
+  /**
+   * Returns a collection of instances whose immediate dominator is the
+   * SENTINEL_ROOT.
+   */
+  public List<AhatInstance> getRooted() {
+    return mRooted;
+  }
+
+  /**
+   * Returns the root site for this snapshot.
+   */
+  public Site getRootSite() {
+    return mRootSite;
+  }
+
+  // Get the site associated with the given id and depth.
+  // Returns the root site if no such site found.
+  public Site getSite(int id, int depth) {
+    AhatInstance obj = findInstance(id);
+    if (obj == null) {
+      return mRootSite;
+    }
+
+    Site site = obj.getSite();
+    for (int i = 0; i < depth && site.getParent() != null; i++) {
+      site = site.getParent();
+    }
+    return site;
+  }
+
+  // Return the Value for the given perflib value object.
+  Value getValue(Object value) {
+    if (value instanceof Instance) {
+      value = findInstance(((Instance)value).getId());
+    }
+    return value == null ? null : new Value(value);
+  }
+
+  public void setBaseline(AhatSnapshot baseline) {
+    mBaseline = baseline;
+  }
+
+  /**
+   * Returns true if this snapshot has been diffed against another, different
+   * snapshot.
+   */
+  public boolean isDiffed() {
+    return mBaseline != this;
+  }
+
+  @Override public AhatSnapshot getBaseline() {
+    return mBaseline;
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return false;
+  }
+}
diff --git a/tools/ahat/src/heapdump/Diff.java b/tools/ahat/src/heapdump/Diff.java
new file mode 100644
index 0000000..943e6e6
--- /dev/null
+++ b/tools/ahat/src/heapdump/Diff.java
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public class Diff {
+  /**
+   * Perform a diff between two heap lists.
+   *
+   * Heaps are diffed based on heap name. PlaceHolder heaps will be added to
+   * the given lists as necessary so that every heap in A has a corresponding
+   * heap in B and vice-versa.
+   */
+  private static void heaps(List<AhatHeap> a, List<AhatHeap> b) {
+    int asize = a.size();
+    int bsize = b.size();
+    for (int i = 0; i < bsize; i++) {
+      // Set the B heap's baseline as null to mark that we have not yet
+      // matched it with an A heap.
+      b.get(i).setBaseline(null);
+    }
+
+    for (int i = 0; i < asize; i++) {
+      AhatHeap aheap = a.get(i);
+      aheap.setBaseline(null);
+      for (int j = 0; j < bsize; j++) {
+        AhatHeap bheap = b.get(j);
+        if (bheap.getBaseline() == null && aheap.getName().equals(bheap.getName())) {
+          // We found a match between aheap and bheap.
+          aheap.setBaseline(bheap);
+          bheap.setBaseline(aheap);
+          break;
+        }
+      }
+
+      if (aheap.getBaseline() == null) {
+        // We did not find any match for aheap in snapshot B.
+        // Create a placeholder heap in snapshot B to use as the baseline.
+        b.add(AhatHeap.newPlaceHolderHeap(aheap.getName(), aheap));
+      }
+    }
+
+    // Make placeholder heaps in snapshot A for any unmatched heaps in
+    // snapshot B.
+    for (int i = 0; i < bsize; i++) {
+      AhatHeap bheap = b.get(i);
+      if (bheap.getBaseline() == null) {
+        a.add(AhatHeap.newPlaceHolderHeap(bheap.getName(), bheap));
+      }
+    }
+  }
+
+  /**
+   * Key represents an equivalence class of AhatInstances that are allowed to
+   * be considered for correspondence between two different snapshots.
+   */
+  private static class Key {
+    // Corresponding objects must belong to classes of the same name.
+    private final String mClass;
+
+    // Corresponding objects must belong to heaps of the same name.
+    private final String mHeapName;
+
+    // Corresponding string objects must have the same value.
+    // mStringValue is set to the empty string for non-string objects.
+    private final String mStringValue;
+
+    // Corresponding class objects must have the same class name.
+    // mClassName is set to the empty string for non-class objects.
+    private final String mClassName;
+
+    // Corresponding array objects must have the same length.
+    // mArrayLength is set to 0 for non-array objects.
+    private final int mArrayLength;
+
+
+    private Key(AhatInstance inst) {
+      mClass = inst.getClassName();
+      mHeapName = inst.getHeap().getName();
+      mClassName = inst.isClassObj() ? inst.asClassObj().getName() : "";
+      String string = inst.asString();
+      mStringValue = string == null ? "" : string;
+      AhatArrayInstance array = inst.asArrayInstance();
+      mArrayLength = array == null ? 0 : array.getLength();
+    }
+
+    /**
+     * Return the key for the given instance.
+     */
+    public static Key keyFor(AhatInstance inst) {
+      return new Key(inst);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof Key)) {
+        return false;
+      }
+      Key o = (Key)other;
+      return mClass.equals(o.mClass)
+          && mHeapName.equals(o.mHeapName)
+          && mStringValue.equals(o.mStringValue)
+          && mClassName.equals(o.mClassName)
+          && mArrayLength == o.mArrayLength;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(mClass, mHeapName, mStringValue, mClassName, mArrayLength);
+    }
+  }
+
+  private static class InstanceListPair {
+    public final List<AhatInstance> a;
+    public final List<AhatInstance> b;
+
+    public InstanceListPair() {
+      this.a = new ArrayList<AhatInstance>();
+      this.b = new ArrayList<AhatInstance>();
+    }
+
+    public InstanceListPair(List<AhatInstance> a, List<AhatInstance> b) {
+      this.a = a;
+      this.b = b;
+    }
+  }
+
+  /**
+   * Recursively create place holder instances for the given instance and
+   * every instance dominated by that instance.
+   * Returns the place holder instance created for the given instance.
+   * Adds all allocated placeholders to the given placeholders list.
+   */
+  private static AhatInstance createPlaceHolders(AhatInstance inst,
+      List<AhatInstance> placeholders) {
+    // Don't actually use recursion, because we could easily smash the stack.
+    // Instead we iterate.
+    AhatInstance result = inst.newPlaceHolderInstance();
+    placeholders.add(result);
+    Deque<AhatInstance> deque = new ArrayDeque<AhatInstance>();
+    deque.push(inst);
+    while (!deque.isEmpty()) {
+      inst = deque.pop();
+
+      for (AhatInstance child : inst.getDominated()) {
+        placeholders.add(child.newPlaceHolderInstance());
+        deque.push(child);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Recursively diff two dominator trees of instances.
+   * PlaceHolder objects are appended to the lists as needed to ensure every
+   * object has a corresponding baseline in the other list. All PlaceHolder
+   * objects are also appended to the given placeholders list, so their Site
+   * info can be updated later on.
+   */
+  private static void instances(List<AhatInstance> a, List<AhatInstance> b,
+      List<AhatInstance> placeholders) {
+    // Don't actually use recursion, because we could easily smash the stack.
+    // Instead we iterate.
+    Deque<InstanceListPair> deque = new ArrayDeque<InstanceListPair>();
+    deque.push(new InstanceListPair(a, b));
+    while (!deque.isEmpty()) {
+      InstanceListPair p = deque.pop();
+
+      // Group instances of the same equivalence class together.
+      Map<Key, InstanceListPair> byKey = new HashMap<Key, InstanceListPair>();
+      for (AhatInstance inst : p.a) {
+        Key key = Key.keyFor(inst);
+        InstanceListPair pair = byKey.get(key);
+        if (pair == null) {
+          pair = new InstanceListPair();
+          byKey.put(key, pair);
+        }
+        pair.a.add(inst);
+      }
+      for (AhatInstance inst : p.b) {
+        Key key = Key.keyFor(inst);
+        InstanceListPair pair = byKey.get(key);
+        if (pair == null) {
+          pair = new InstanceListPair();
+          byKey.put(key, pair);
+        }
+        pair.b.add(inst);
+      }
+
+      // diff objects from the same key class.
+      for (InstanceListPair pair : byKey.values()) {
+        // Sort by retained size and assume the elements at the top of the lists
+        // correspond to each other in that order. This could probably be
+        // improved if desired, but it gives good enough results for now.
+        Collections.sort(pair.a, Sort.INSTANCE_BY_TOTAL_RETAINED_SIZE);
+        Collections.sort(pair.b, Sort.INSTANCE_BY_TOTAL_RETAINED_SIZE);
+
+        int common = Math.min(pair.a.size(), pair.b.size());
+        for (int i = 0; i < common; i++) {
+          AhatInstance ainst = pair.a.get(i);
+          AhatInstance binst = pair.b.get(i);
+          ainst.setBaseline(binst);
+          binst.setBaseline(ainst);
+          deque.push(new InstanceListPair(ainst.getDominated(), binst.getDominated()));
+        }
+
+        // Add placeholder objects for anything leftover.
+        for (int i = common; i < pair.a.size(); i++) {
+          p.b.add(createPlaceHolders(pair.a.get(i), placeholders));
+        }
+
+        for (int i = common; i < pair.b.size(); i++) {
+          p.a.add(createPlaceHolders(pair.b.get(i), placeholders));
+        }
+      }
+    }
+  }
+
+  /**
+   * Sets the baseline for root and all its descendants to baseline.
+   */
+  private static void setSitesBaseline(Site root, Site baseline) {
+    root.setBaseline(baseline);
+    for (Site child : root.getChildren()) {
+      setSitesBaseline(child, baseline);
+    }
+  }
+
+  /**
+   * Recursively diff the two sites, setting them and their descendants as
+   * baselines for each other as appropriate.
+   *
+   * This requires that instances have already been diffed. In particular, we
+   * require all AhatClassObjs in one snapshot have corresponding (possibly
+   * place-holder) AhatClassObjs in the other snapshot.
+   */
+  private static void sites(Site a, Site b) {
+    // Set the sites as baselines of each other.
+    a.setBaseline(b);
+    b.setBaseline(a);
+
+    // Set the site's ObjectsInfos as baselines of each other. This implicitly
+    // adds new empty ObjectsInfo as needed.
+    for (Site.ObjectsInfo ainfo : a.getObjectsInfos()) {
+      AhatClassObj baseClassObj = null;
+      if (ainfo.classObj != null) {
+        baseClassObj = (AhatClassObj) ainfo.classObj.getBaseline();
+      }
+      ainfo.setBaseline(b.getObjectsInfo(ainfo.heap.getBaseline(), baseClassObj));
+    }
+    for (Site.ObjectsInfo binfo : b.getObjectsInfos()) {
+      AhatClassObj baseClassObj = null;
+      if (binfo.classObj != null) {
+        baseClassObj = (AhatClassObj) binfo.classObj.getBaseline();
+      }
+      binfo.setBaseline(a.getObjectsInfo(binfo.heap.getBaseline(), baseClassObj));
+    }
+
+    // Set B children's baselines as null to mark that we have not yet matched
+    // them with A children.
+    for (Site bchild : b.getChildren()) {
+      bchild.setBaseline(null);
+    }
+
+    for (Site achild : a.getChildren()) {
+      achild.setBaseline(null);
+      for (Site bchild : b.getChildren()) {
+        if (achild.getLineNumber() == bchild.getLineNumber()
+            && achild.getMethodName().equals(bchild.getMethodName())
+            && achild.getSignature().equals(bchild.getSignature())
+            && achild.getFilename().equals(bchild.getFilename())) {
+          // We found a match between achild and bchild.
+          sites(achild, bchild);
+          break;
+        }
+      }
+
+      if (achild.getBaseline() == null) {
+        // We did not find any match for achild in site B.
+        // Use B for the baseline of achild and its descendants.
+        setSitesBaseline(achild, b);
+      }
+    }
+
+    for (Site bchild : b.getChildren()) {
+      if (bchild.getBaseline() == null) {
+        setSitesBaseline(bchild, a);
+      }
+    }
+  }
+
+  /**
+   * Perform a diff of the two snapshots, setting each as the baseline for the
+   * other.
+   */
+  public static void snapshots(AhatSnapshot a, AhatSnapshot b) {
+    a.setBaseline(b);
+    b.setBaseline(a);
+
+    // Diff the heaps of each snapshot.
+    heaps(a.getHeaps(), b.getHeaps());
+
+    // Diff the instances of each snapshot.
+    List<AhatInstance> placeholders = new ArrayList<AhatInstance>();
+    instances(a.getRooted(), b.getRooted(), placeholders);
+
+    // Diff the sites of each snapshot.
+    // This requires the instances have already been diffed.
+    sites(a.getRootSite(), b.getRootSite());
+
+    // Add placeholders to their corresponding sites.
+    // This requires the sites have already been diffed.
+    for (AhatInstance placeholder : placeholders) {
+      placeholder.getBaseline().getSite().getBaseline().addPlaceHolderInstance(placeholder);
+    }
+  }
+
+  /**
+   * Diff two lists of field values.
+   * PlaceHolder objects are added to the given lists as needed to ensure
+   * every FieldValue in A ends up with a corresponding FieldValue in B.
+   */
+  public static void fields(List<FieldValue> a, List<FieldValue> b) {
+    // Fields with the same name and type are considered matching fields.
+    // For simplicity, we assume the matching fields are in the same order in
+    // both A and B, though some fields may be added or removed in either
+    // list. If our assumption is wrong, in the worst case the quality of the
+    // field diff is poor.
+
+    for (int i = 0; i < a.size(); i++) {
+      FieldValue afield = a.get(i);
+      afield.setBaseline(null);
+
+      // Find the matching field in B, if any.
+      for (int j = i; j < b.size(); j++) {
+        FieldValue bfield = b.get(j);
+        if (afield.getName().equals(bfield.getName())
+            && afield.getType().equals(bfield.getType())) {
+          // We found the matching field in B.
+          // Assume fields i, ..., j-1 in B have no match in A.
+          for ( ; i < j; i++) {
+            a.add(i, FieldValue.newPlaceHolderFieldValue(b.get(i)));
+          }
+
+          afield.setBaseline(bfield);
+          bfield.setBaseline(afield);
+          break;
+        }
+      }
+
+      if (afield.getBaseline() == null) {
+        b.add(i, FieldValue.newPlaceHolderFieldValue(afield));
+      }
+    }
+
+    // All remaining fields in B are unmatched by any in A.
+    for (int i = a.size(); i < b.size(); i++) {
+      a.add(i, FieldValue.newPlaceHolderFieldValue(b.get(i)));
+    }
+  }
+}
diff --git a/tools/ahat/src/heapdump/Diffable.java b/tools/ahat/src/heapdump/Diffable.java
new file mode 100644
index 0000000..53442c8
--- /dev/null
+++ b/tools/ahat/src/heapdump/Diffable.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+/**
+ * An interface for objects that have corresponding objects in a baseline heap
+ * dump.
+ */
+public interface Diffable<T> {
+  /**
+   * Return the baseline object that corresponds to this one.
+   */
+  T getBaseline();
+
+  /**
+   * Returns true if this is a placeholder object.
+   * A placeholder object is used to indicate there is some object in the
+   * baseline heap dump that is not in this heap dump. In that case, we create
+   * a dummy place holder object in this heap dump as an indicator of the
+   * object removed from the baseline heap dump.
+   */
+  boolean isPlaceHolder();
+}
+
diff --git a/tools/ahat/src/heapdump/FieldValue.java b/tools/ahat/src/heapdump/FieldValue.java
new file mode 100644
index 0000000..3f65cd3
--- /dev/null
+++ b/tools/ahat/src/heapdump/FieldValue.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+public class FieldValue implements Diffable<FieldValue> {
+  private final String mName;
+  private final String mType;
+  private final Value mValue;
+  private FieldValue mBaseline;
+  private final boolean mIsPlaceHolder;
+
+  public FieldValue(String name, String type, Value value) {
+    mName = name;
+    mType = type;
+    mValue = value;
+    mBaseline = this;
+    mIsPlaceHolder = false;
+  }
+
+  /**
+   * Construct a place holder FieldValue
+   */
+  private FieldValue(FieldValue baseline) {
+    mName = baseline.mName;
+    mType = baseline.mType;
+    mValue = Value.getBaseline(baseline.mValue);
+    mBaseline = baseline;
+    mIsPlaceHolder = true;
+  }
+
+  static FieldValue newPlaceHolderFieldValue(FieldValue baseline) {
+    FieldValue field = new FieldValue(baseline);
+    baseline.setBaseline(field);
+    return field;
+  }
+
+  /**
+   * Returns the name of the field.
+   */
+  public String getName() {
+    return mName;
+  }
+
+  /**
+   * Returns a description of the type of the field.
+   */
+  public String getType() {
+    return mType;
+  }
+
+  /**
+   * Returns the value of this field.
+   */
+  public Value getValue() {
+    return mValue;
+  }
+
+  public void setBaseline(FieldValue baseline) {
+    mBaseline = baseline;
+  }
+
+  @Override public FieldValue getBaseline() {
+    return mBaseline;
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return mIsPlaceHolder;
+  }
+}
diff --git a/tools/ahat/src/heapdump/PathElement.java b/tools/ahat/src/heapdump/PathElement.java
new file mode 100644
index 0000000..196a246
--- /dev/null
+++ b/tools/ahat/src/heapdump/PathElement.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+public class PathElement implements Diffable<PathElement> {
+  public final AhatInstance instance;
+  public final String field;
+  public boolean isDominator;
+
+  public PathElement(AhatInstance instance, String field) {
+    this.instance = instance;
+    this.field = field;
+    this.isDominator = false;
+  }
+
+  @Override public PathElement getBaseline() {
+    return this;
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return false;
+  }
+}
diff --git a/tools/ahat/src/heapdump/Site.java b/tools/ahat/src/heapdump/Site.java
new file mode 100644
index 0000000..738eaf0
--- /dev/null
+++ b/tools/ahat/src/heapdump/Site.java
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.heap.StackFrame;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class Site implements Diffable<Site> {
+  // The site that this site was directly called from.
+  // mParent is null for the root site.
+  private Site mParent;
+
+  private String mMethodName;
+  private String mSignature;
+  private String mFilename;
+  private int mLineNumber;
+
+  // To identify this site, we pick a stack trace that includes the site.
+  // mId is the id of an object allocated at that stack trace, and mDepth
+  // is the number of calls between this site and the innermost site of
+  // allocation of the object with mId.
+  // For the root site, mId is 0 and mDepth is 0.
+  private long mId;
+  private int mDepth;
+
+  // The total size of objects allocated in this site (including child sites),
+  // organized by heap index. Heap indices outside the range of mSizesByHeap
+  // implicitly have size 0.
+  private long[] mSizesByHeap;
+
+  // List of child sites.
+  private List<Site> mChildren;
+
+  // List of all objects allocated in this site (including child sites).
+  private List<AhatInstance> mObjects;
+  private List<ObjectsInfo> mObjectsInfos;
+  private Map<AhatHeap, Map<AhatClassObj, ObjectsInfo>> mObjectsInfoMap;
+
+  private Site mBaseline;
+
+  public static class ObjectsInfo implements Diffable<ObjectsInfo> {
+    public AhatHeap heap;
+    public AhatClassObj classObj;   // May be null.
+    public long numInstances;
+    public long numBytes;
+    private ObjectsInfo baseline;
+
+    public ObjectsInfo(AhatHeap heap, AhatClassObj classObj, long numInstances, long numBytes) {
+      this.heap = heap;
+      this.classObj = classObj;
+      this.numInstances = numInstances;
+      this.numBytes = numBytes;
+      this.baseline = this;
+    }
+
+    /**
+     * Returns the name of the class this ObjectsInfo is associated with.
+     */
+    public String getClassName() {
+      return classObj == null ? "???" : classObj.getName();
+    }
+
+    public void setBaseline(ObjectsInfo baseline) {
+      this.baseline = baseline;
+    }
+
+    @Override public ObjectsInfo getBaseline() {
+      return baseline;
+    }
+
+    @Override public boolean isPlaceHolder() {
+      return false;
+    }
+  }
+
+  /**
+   * Construct a root site.
+   */
+  public Site(String name) {
+    this(null, name, "", "", 0, 0, 0);
+  }
+
+  public Site(Site parent, String method, String signature, String file,
+      int line, long id, int depth) {
+    mParent = parent;
+    mMethodName = method;
+    mSignature = signature;
+    mFilename = file;
+    mLineNumber = line;
+    mId = id;
+    mDepth = depth;
+    mSizesByHeap = new long[1];
+    mChildren = new ArrayList<Site>();
+    mObjects = new ArrayList<AhatInstance>();
+    mObjectsInfos = new ArrayList<ObjectsInfo>();
+    mObjectsInfoMap = new HashMap<AhatHeap, Map<AhatClassObj, ObjectsInfo>>();
+    mBaseline = this;
+  }
+
+  /**
+   * Add an instance to this site.
+   * Returns the site at which the instance was allocated.
+   * @param frames - The list of frames in the stack trace, starting with the inner-most frame.
+   * @param depth - The number of frames remaining before the inner-most frame is reached.
+   */
+  Site add(StackFrame[] frames, int depth, AhatInstance inst) {
+    return add(this, frames, depth, inst);
+  }
+
+  private static Site add(Site site, StackFrame[] frames, int depth, AhatInstance inst) {
+    while (true) {
+      site.mObjects.add(inst);
+
+      ObjectsInfo info = site.getObjectsInfo(inst.getHeap(), inst.getClassObj());
+      if (inst.isReachable()) {
+        AhatHeap heap = inst.getHeap();
+        if (heap.getIndex() >= site.mSizesByHeap.length) {
+          long[] newSizes = new long[heap.getIndex() + 1];
+          for (int i = 0; i < site.mSizesByHeap.length; i++) {
+            newSizes[i] = site.mSizesByHeap[i];
+          }
+          site.mSizesByHeap = newSizes;
+        }
+        site.mSizesByHeap[heap.getIndex()] += inst.getSize();
+
+        info.numInstances++;
+        info.numBytes += inst.getSize();
+      }
+
+      if (depth > 0) {
+        StackFrame next = frames[depth - 1];
+        Site child = null;
+        for (int i = 0; i < site.mChildren.size(); i++) {
+          Site curr = site.mChildren.get(i);
+          if (curr.mLineNumber == next.getLineNumber()
+              && curr.mMethodName.equals(next.getMethodName())
+              && curr.mSignature.equals(next.getSignature())
+              && curr.mFilename.equals(next.getFilename())) {
+            child = curr;
+            break;
+          }
+        }
+        if (child == null) {
+          child = new Site(site, next.getMethodName(), next.getSignature(),
+              next.getFilename(), next.getLineNumber(), inst.getId(), depth - 1);
+          site.mChildren.add(child);
+        }
+        depth = depth - 1;
+        site = child;
+      } else {
+        return site;
+      }
+    }
+  }
+
+  // Get the size of a site for a specific heap.
+  public long getSize(AhatHeap heap) {
+    int index = heap.getIndex();
+    return index >= 0 && index < mSizesByHeap.length ? mSizesByHeap[index] : 0;
+  }
+
+  /**
+   * Get the list of objects allocated under this site. Includes objects
+   * allocated in children sites.
+   */
+  public Collection<AhatInstance> getObjects() {
+    return mObjects;
+  }
+
+  /**
+   * Returns the ObjectsInfo at this site for the given heap and class
+   * objects. Creates a new empty ObjectsInfo if none existed before.
+   */
+  ObjectsInfo getObjectsInfo(AhatHeap heap, AhatClassObj classObj) {
+    Map<AhatClassObj, ObjectsInfo> classToObjectsInfo = mObjectsInfoMap.get(heap);
+    if (classToObjectsInfo == null) {
+      classToObjectsInfo = new HashMap<AhatClassObj, ObjectsInfo>();
+      mObjectsInfoMap.put(heap, classToObjectsInfo);
+    }
+
+    ObjectsInfo info = classToObjectsInfo.get(classObj);
+    if (info == null) {
+      info = new ObjectsInfo(heap, classObj, 0, 0);
+      mObjectsInfos.add(info);
+      classToObjectsInfo.put(classObj, info);
+    }
+    return info;
+  }
+
+  public List<ObjectsInfo> getObjectsInfos() {
+    return mObjectsInfos;
+  }
+
+  // Get the combined size of the site for all heaps.
+  public long getTotalSize() {
+    long total = 0;
+    for (int i = 0; i < mSizesByHeap.length; i++) {
+      total += mSizesByHeap[i];
+    }
+    return total;
+  }
+
+  /**
+   * Return the site this site was called from.
+   * Returns null for the root site.
+   */
+  public Site getParent() {
+    return mParent;
+  }
+
+  public String getMethodName() {
+    return mMethodName;
+  }
+
+  public String getSignature() {
+    return mSignature;
+  }
+
+  public String getFilename() {
+    return mFilename;
+  }
+
+  public int getLineNumber() {
+    return mLineNumber;
+  }
+
+  /**
+   * Returns the id of some object allocated in this site.
+   */
+  public long getId() {
+    return mId;
+  }
+
+  /**
+   * Returns the number of frames between this site and the site where the
+   * object with id getId() was allocated.
+   */
+  public int getDepth() {
+    return mDepth;
+  }
+
+  public List<Site> getChildren() {
+    return mChildren;
+  }
+
+  void setBaseline(Site baseline) {
+    mBaseline = baseline;
+  }
+
+  @Override public Site getBaseline() {
+    return mBaseline;
+  }
+
+  @Override public boolean isPlaceHolder() {
+    return false;
+  }
+
+  /**
+   * Adds a place holder instance to this site and all parent sites.
+   */
+  void addPlaceHolderInstance(AhatInstance placeholder) {
+    for (Site site = this; site != null; site = site.mParent) {
+      site.mObjects.add(placeholder);
+    }
+  }
+}
diff --git a/tools/ahat/src/heapdump/Sort.java b/tools/ahat/src/heapdump/Sort.java
new file mode 100644
index 0000000..93d147a
--- /dev/null
+++ b/tools/ahat/src/heapdump/Sort.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Provides Comparators and helper functions for sorting Instances, Sites, and
+ * other things.
+ *
+ * Note: The Comparators defined here impose orderings that are inconsistent
+ * with equals. They should not be used for element lookup or search. They
+ * should only be used for showing elements to the user in different orders.
+ */
+public class Sort {
+  /**
+   * Compare instances by their total retained size.
+   * Different instances with the same total retained size are considered
+   * equal for the purposes of comparison.
+   * This sorts instances from larger retained size to smaller retained size.
+   */
+  public static final Comparator<AhatInstance> INSTANCE_BY_TOTAL_RETAINED_SIZE
+    = new Comparator<AhatInstance>() {
+    @Override
+    public int compare(AhatInstance a, AhatInstance b) {
+      return Long.compare(b.getTotalRetainedSize(), a.getTotalRetainedSize());
+    }
+  };
+
+  /**
+   * Compare instances by their retained size for a given heap index.
+   * Different instances with the same total retained size are considered
+   * equal for the purposes of comparison.
+   * This sorts instances from larger retained size to smaller retained size.
+   */
+  public static class InstanceByHeapRetainedSize implements Comparator<AhatInstance> {
+    private AhatHeap mHeap;
+
+    public InstanceByHeapRetainedSize(AhatHeap heap) {
+      mHeap = heap;
+    }
+
+    @Override
+    public int compare(AhatInstance a, AhatInstance b) {
+      return Long.compare(b.getRetainedSize(mHeap), a.getRetainedSize(mHeap));
+    }
+  }
+
+  /**
+   * Compare objects based on a list of comparators, giving priority to the
+   * earlier comparators in the list.
+   */
+  public static class WithPriority<T> implements Comparator<T> {
+    private List<Comparator<T>> mComparators;
+
+    public WithPriority(Comparator<T>... comparators) {
+      mComparators = Arrays.asList(comparators);
+    }
+
+    public WithPriority(List<Comparator<T>> comparators) {
+      mComparators = comparators;
+    }
+
+    @Override
+    public int compare(T a, T b) {
+      int res = 0;
+      Iterator<Comparator<T>> iter = mComparators.iterator();
+      while (res == 0 && iter.hasNext()) {
+        res = iter.next().compare(a, b);
+      }
+      return res;
+    }
+  }
+
+  public static Comparator<AhatInstance> defaultInstanceCompare(AhatSnapshot snapshot) {
+    List<Comparator<AhatInstance>> comparators = new ArrayList<Comparator<AhatInstance>>();
+
+    // Priority goes to the app heap, if we can find one.
+    AhatHeap appHeap = snapshot.getHeap("app");
+    if (appHeap != null) {
+      comparators.add(new InstanceByHeapRetainedSize(appHeap));
+    }
+
+    // Next is by total retained size.
+    comparators.add(INSTANCE_BY_TOTAL_RETAINED_SIZE);
+    return new WithPriority<AhatInstance>(comparators);
+  }
+
+  /**
+   * Compare Sites by the size of objects allocated on a given heap.
+   * Different object infos with the same size on the given heap are
+   * considered equal for the purposes of comparison.
+   * This sorts sites from larger size to smaller size.
+   */
+  public static class SiteByHeapSize implements Comparator<Site> {
+    AhatHeap mHeap;
+
+    public SiteByHeapSize(AhatHeap heap) {
+      mHeap = heap;
+    }
+
+    @Override
+    public int compare(Site a, Site b) {
+      return Long.compare(b.getSize(mHeap), a.getSize(mHeap));
+    }
+  }
+
+  /**
+   * Compare Sites by the total size of objects allocated.
+   * This sorts sites from larger size to smaller size.
+   */
+  public static final Comparator<Site> SITE_BY_TOTAL_SIZE = new Comparator<Site>() {
+    @Override
+    public int compare(Site a, Site b) {
+      return Long.compare(b.getTotalSize(), a.getTotalSize());
+    }
+  };
+
+  public static Comparator<Site> defaultSiteCompare(AhatSnapshot snapshot) {
+    List<Comparator<Site>> comparators = new ArrayList<Comparator<Site>>();
+
+    // Priority goes to the app heap, if we can find one.
+    AhatHeap appHeap = snapshot.getHeap("app");
+    if (appHeap != null) {
+      comparators.add(new SiteByHeapSize(appHeap));
+    }
+
+    // Next is by total size.
+    comparators.add(SITE_BY_TOTAL_SIZE);
+    return new WithPriority<Site>(comparators);
+  }
+
+  /**
+   * Compare Site.ObjectsInfo by their size.
+   * Different object infos with the same total retained size are considered
+   * equal for the purposes of comparison.
+   * This sorts object infos from larger retained size to smaller size.
+   */
+  public static final Comparator<Site.ObjectsInfo> OBJECTS_INFO_BY_SIZE
+    = new Comparator<Site.ObjectsInfo>() {
+    @Override
+    public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
+      return Long.compare(b.numBytes, a.numBytes);
+    }
+  };
+
+  /**
+   * Compare Site.ObjectsInfo by heap name.
+   * Different object infos with the same heap name are considered equal for
+   * the purposes of comparison.
+   */
+  public static final Comparator<Site.ObjectsInfo> OBJECTS_INFO_BY_HEAP_NAME
+    = new Comparator<Site.ObjectsInfo>() {
+    @Override
+    public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
+      return a.heap.getName().compareTo(b.heap.getName());
+    }
+  };
+
+  /**
+   * Compare Site.ObjectsInfo by class name.
+   * Different object infos with the same class name are considered equal for
+   * the purposes of comparison.
+   */
+  public static final Comparator<Site.ObjectsInfo> OBJECTS_INFO_BY_CLASS_NAME
+    = new Comparator<Site.ObjectsInfo>() {
+    @Override
+    public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
+      String aName = a.getClassName();
+      String bName = b.getClassName();
+      return aName.compareTo(bName);
+    }
+  };
+}
+
diff --git a/tools/ahat/src/heapdump/Value.java b/tools/ahat/src/heapdump/Value.java
new file mode 100644
index 0000000..6b2d38f
--- /dev/null
+++ b/tools/ahat/src/heapdump/Value.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+/**
+ * Value represents a field value in a heap dump. The field value is either a
+ * subclass of AhatInstance or a primitive Java type.
+ */
+public class Value {
+  private Object mObject;
+
+  /**
+   * Constructs a value from a generic Java Object.
+   * The Object must either be a boxed Java primitive type or a subclass of
+   * AhatInstance. The object must not be null.
+   */
+  Value(Object object) {
+    // TODO: Check that the Object is either an AhatSnapshot or boxed Java
+    // primitive type?
+    assert object != null;
+    mObject = object;
+  }
+
+  /**
+   * Returns true if the Value is an AhatInstance, as opposed to a Java
+   * primitive value.
+   */
+  public boolean isAhatInstance() {
+    return mObject instanceof AhatInstance;
+  }
+
+  /**
+   * Return the Value as an AhatInstance if it is one.
+   * Returns null if the Value represents a Java primitive value.
+   */
+  public AhatInstance asAhatInstance() {
+    if (isAhatInstance()) {
+      return (AhatInstance)mObject;
+    }
+    return null;
+  }
+
+  /**
+   * Returns true if the Value is an Integer.
+   */
+  public boolean isInteger() {
+    return mObject instanceof Integer;
+  }
+
+  /**
+   * Return the Value as an Integer if it is one.
+   * Returns null if the Value does not represent an Integer.
+   */
+  public Integer asInteger() {
+    if (isInteger()) {
+      return (Integer)mObject;
+    }
+    return null;
+  }
+
+  /**
+   * Returns true if the Value is an Long.
+   */
+  public boolean isLong() {
+    return mObject instanceof Long;
+  }
+
+  /**
+   * Return the Value as an Long if it is one.
+   * Returns null if the Value does not represent an Long.
+   */
+  public Long asLong() {
+    if (isLong()) {
+      return (Long)mObject;
+    }
+    return null;
+  }
+
+  /**
+   * Return the Value as a Byte if it is one.
+   * Returns null if the Value does not represent a Byte.
+   */
+  public Byte asByte() {
+    if (mObject instanceof Byte) {
+      return (Byte)mObject;
+    }
+    return null;
+  }
+
+  /**
+   * Return the Value as a Char if it is one.
+   * Returns null if the Value does not represent a Char.
+   */
+  public Character asChar() {
+    if (mObject instanceof Character) {
+      return (Character)mObject;
+    }
+    return null;
+  }
+
+  public String toString() {
+    return mObject.toString();
+  }
+
+  public static Value getBaseline(Value value) {
+    if (value == null || !value.isAhatInstance()) {
+      return value;
+    }
+    return new Value(value.asAhatInstance().getBaseline());
+  }
+
+  @Override public boolean equals(Object other) {
+    if (other instanceof Value) {
+      Value value = (Value)other;
+      return mObject.equals(value.mObject);
+    }
+    return false;
+  }
+}
diff --git a/tools/ahat/src/help.html b/tools/ahat/src/help.html
deleted file mode 100644
index ff04ad2..0000000
--- a/tools/ahat/src/help.html
+++ /dev/null
@@ -1,80 +0,0 @@
-<!--
-Copyright (C) 2015 The Android Open Source Project
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-<h1>Help</h1>
-<h2>Information shown by ahat:</h2>
-<ul>
-  <li><a href="/">The total bytes retained by heap.</a></li>
-  <li><a href="/rooted">A list of rooted objects and their retained sizes for each heap.</a></li>
-  <li>Information about each allocated object:
-    <ul>
-      <li>The allocation site (stack trace) of the object (if available).</li>
-      <li>The dominator path from a root to the object.</li>
-      <li>The class, (shallow) size, retained size, and heap of the object.</li>
-      <li>The bitmap image for the object if the object represents a bitmap.</li>
-      <li>The instance fields or array elements of the object.</li>
-      <li>The super class, class loader, and static fields of class objects.</li>
-      <li>Other objects with references to the object.</li>
-      <li>Other objects immediately dominated by the object.</li>
-    </ul>
-  </li>
-  <li>A list of objects, optionally filtered by class, allocation site, and/or
-    heap.</li>
-  <li><a href="site">Information about each allocation site:</a>
-    <ul>
-      <li>The stack trace for the allocation site.</li>
-      <li>The number of bytes allocated at the allocation site.</li>
-      <li>Child sites called from the allocation site.</li>
-      <li>The size and count of objects allocated at the site, organized by
-        heap and object type.</li>
-    </ul>
-  </li>
-</ul>
-
-<h2>Tips:</h2>
-<h3>Heaps</h3>
-<p>
-Android heap dumps contain information for multiple heaps. The <b>app</b> heap
-is the memory used by your application. The <b>zygote</b> and <b>image</b>
-heaps are used by the system. You should ignore everything in the zygote and
-image heap and look only at the app heap. This is because changes in your
-application will not effect the zygote or image heaps, and because the zygote
-and image heaps are shared, they don't contribute significantly to your
-applications PSS.
-</p>
-
-<h3>Bitmaps</h3>
-<p>
-Bitmaps store their data using byte[] arrays. Whenever you see a large
-byte[], check if it is a bitmap by looking to see if there is a single
-android.graphics.Bitmap object referring to it. The byte[] will be marked as a
-root, but it is really being retained by the android.graphics.Bitmap object.
-</p>
-
-<h3>DexCaches</h3>
-<p>
-For each DexFile you load, there will be a corresponding DexCache whose size
-is proportional to the number of strings, fields, methods, and classes in your
-dex file. The DexCache entries may or may not be visible depending on the
-version of the Android platform the heap dump is from.
-</p>
-
-<h3>FinalizerReferences</h3>
-<p>
-A FinalizerReference is allocated for every object on the heap that has a
-non-trivial finalizer. These are stored in a linked list reachable from the
-FinalizerReference class object.
-</p>
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index 1993910..20245f3 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 0.8
+Implementation-Version: 1.1
 Main-Class: com.android.ahat.Main
diff --git a/tools/ahat/src/style.css b/tools/ahat/src/style.css
index ca074a5..47fae1d 100644
--- a/tools/ahat/src/style.css
+++ b/tools/ahat/src/style.css
@@ -18,6 +18,14 @@
   background-color: #eeffff;
 }
 
+span.added {
+  color: #770000;
+}
+
+span.removed {
+  color: #007700;
+}
+
 /*
  * Most of the columns show numbers of bytes. Numbers should be right aligned.
  */
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 587d9de..7a05b1c 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -18,8 +18,9 @@
 import java.io.IOException;
 import java.lang.ref.PhantomReference;
 import java.lang.ref.ReferenceQueue;
+import java.lang.ref.SoftReference;
 import java.lang.ref.WeakReference;
-import libcore.util.NativeAllocationRegistry;
+import org.apache.harmony.dalvik.ddmc.DdmVmInternal;
 
 /**
  * Program used to create a heap dump for test purposes.
@@ -39,13 +40,32 @@
     }
   }
 
+  public static class AddedObject {
+  }
+
+  public static class RemovedObject {
+  }
+
+  public static class UnchangedObject {
+  }
+
+  public static class ModifiedObject {
+    public int value;
+    public String modifiedRefField;
+    public String unmodifiedRefField;
+  }
+
+  public static class StackSmasher {
+    public StackSmasher child;
+  }
+
   // We will take a heap dump that includes a single instance of this
   // DumpedStuff class. Objects stored as fields in this class can be easily
   // found in the hprof dump by searching for the instance of the DumpedStuff
   // class and reading the desired field.
   public static class DumpedStuff {
     public String basicString = "hello, world";
-    public String nonAscii = "Sigma (\u01a9) is not ASCII";
+    public String nonAscii = "Sigma (Æ©) is not ASCII";
     public String embeddedZero = "embedded\0...";  // Non-ASCII for string compression purposes.
     public char[] charArray = "char thing".toCharArray();
     public String nullString = null;
@@ -53,23 +73,53 @@
     public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
     public PhantomReference aPhantomReference = new PhantomReference(anObject, referenceQueue);
     public WeakReference aWeakReference = new WeakReference(anObject, referenceQueue);
+    public WeakReference aNullReferentReference = new WeakReference(null, referenceQueue);
+    public SoftReference aSoftReference = new SoftReference(new Object());
     public byte[] bigArray;
     public ObjectTree[] gcPathArray = new ObjectTree[]{null, null,
       new ObjectTree(
           new ObjectTree(null, new ObjectTree(null, null)),
           new ObjectTree(null, null)),
       null};
+    public Object[] basicStringRef;
+    public AddedObject addedObject;
+    public UnchangedObject unchangedObject = new UnchangedObject();
+    public RemovedObject removedObject;
+    public ModifiedObject modifiedObject;
+    public StackSmasher stackSmasher;
+    public StackSmasher stackSmasherAdded;
+    public static String modifiedStaticField;
+    public int[] modifiedArray;
 
-    DumpedStuff() {
-      int N = 1000000;
+    DumpedStuff(boolean baseline) {
+      int N = baseline ? 400000 : 1000000;
       bigArray = new byte[N];
       for (int i = 0; i < N; i++) {
         bigArray[i] = (byte)((i*i) & 0xFF);
       }
 
-      NativeAllocationRegistry registry = new NativeAllocationRegistry(
-          Main.class.getClassLoader(), 0x12345, 42);
-      registry.registerNativeAllocation(anObject, 0xABCDABCD);
+      addedObject = baseline ? null : new AddedObject();
+      removedObject = baseline ? new RemovedObject() : null;
+      modifiedObject = new ModifiedObject();
+      modifiedObject.value = baseline ? 5 : 8;
+      modifiedObject.modifiedRefField = baseline ? "A1" : "A2";
+      modifiedObject.unmodifiedRefField = "B";
+      modifiedStaticField = baseline ? "C1" : "C2";
+      modifiedArray = baseline ? new int[]{0,1,2,3} : new int[]{3,1,2,0};
+
+      // Deep matching dominator trees shouldn't smash the stack when we try
+      // to diff them. Make some deep dominator trees to help test it.
+      for (int i = 0; i < 10000; i++) {
+        StackSmasher smasher = new StackSmasher();
+        smasher.child = stackSmasher;
+        stackSmasher = smasher;
+
+        if (!baseline) {
+          smasher = new StackSmasher();
+          smasher.child = stackSmasherAdded;
+          stackSmasherAdded = smasher;
+        }
+      }
 
       gcPathArray[2].right.left = gcPathArray[2].left.right;
     }
@@ -82,8 +132,21 @@
     }
     String file = args[0];
 
+    // If a --base argument is provided, it means we should generate a
+    // baseline hprof file suitable for using in testing diff.
+    boolean baseline = args.length > 1 && args[1].equals("--base");
+
+    // Enable allocation tracking so we get stack traces in the heap dump.
+    DdmVmInternal.enableRecentAllocations(true);
+
     // Allocate the instance of DumpedStuff.
-    stuff = new DumpedStuff();
+    stuff = new DumpedStuff(baseline);
+
+    // Create a bunch of unreachable objects pointing to basicString for the
+    // reverseReferencesAreNotUnreachable test
+    for (int i = 0; i < 100; i++) {
+      stuff.basicStringRef = new Object[]{stuff.basicString};
+    }
 
     // Take a heap dump that will include that instance of DumpedStuff.
     System.err.println("Dumping hprof data to " + file);
diff --git a/tools/ahat/test/DiffTest.java b/tools/ahat/test/DiffTest.java
new file mode 100644
index 0000000..52b6b7b
--- /dev/null
+++ b/tools/ahat/test/DiffTest.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Diff;
+import com.android.ahat.heapdump.FieldValue;
+import com.android.tools.perflib.heap.hprof.HprofClassDump;
+import com.android.tools.perflib.heap.hprof.HprofConstant;
+import com.android.tools.perflib.heap.hprof.HprofDumpRecord;
+import com.android.tools.perflib.heap.hprof.HprofHeapDump;
+import com.android.tools.perflib.heap.hprof.HprofInstanceDump;
+import com.android.tools.perflib.heap.hprof.HprofInstanceField;
+import com.android.tools.perflib.heap.hprof.HprofLoadClass;
+import com.android.tools.perflib.heap.hprof.HprofPrimitiveArrayDump;
+import com.android.tools.perflib.heap.hprof.HprofRecord;
+import com.android.tools.perflib.heap.hprof.HprofRootDebugger;
+import com.android.tools.perflib.heap.hprof.HprofStaticField;
+import com.android.tools.perflib.heap.hprof.HprofStringBuilder;
+import com.android.tools.perflib.heap.hprof.HprofType;
+import com.google.common.io.ByteArrayDataOutput;
+import com.google.common.io.ByteStreams;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class DiffTest {
+  @Test
+  public void diffMatchedHeap() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatHeap a = dump.getAhatSnapshot().getHeap("app");
+    assertNotNull(a);
+    AhatHeap b = dump.getBaselineAhatSnapshot().getHeap("app");
+    assertNotNull(b);
+    assertEquals(a.getBaseline(), b);
+    assertEquals(b.getBaseline(), a);
+  }
+
+  @Test
+  public void diffUnchanged() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatInstance a = dump.getDumpedAhatInstance("unchangedObject");
+    assertNotNull(a);
+
+    AhatInstance b = dump.getBaselineDumpedAhatInstance("unchangedObject");
+    assertNotNull(b);
+    assertEquals(a, b.getBaseline());
+    assertEquals(b, a.getBaseline());
+    assertEquals(a.getSite(), b.getSite().getBaseline());
+    assertEquals(b.getSite(), a.getSite().getBaseline());
+  }
+
+  @Test
+  public void diffAdded() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatInstance a = dump.getDumpedAhatInstance("addedObject");
+    assertNotNull(a);
+    assertNull(dump.getBaselineDumpedAhatInstance("addedObject"));
+    assertTrue(a.getBaseline().isPlaceHolder());
+  }
+
+  @Test
+  public void diffRemoved() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    assertNull(dump.getDumpedAhatInstance("removedObject"));
+    AhatInstance b = dump.getBaselineDumpedAhatInstance("removedObject");
+    assertNotNull(b);
+    assertTrue(b.getBaseline().isPlaceHolder());
+  }
+
+  @Test
+  public void nullClassObj() throws IOException {
+    // Set up a heap dump that has a null classObj.
+    // The heap dump is derived from the InstanceTest.asStringEmbedded test.
+    HprofStringBuilder strings = new HprofStringBuilder(0);
+    List<HprofRecord> records = new ArrayList<HprofRecord>();
+    List<HprofDumpRecord> dump = new ArrayList<HprofDumpRecord>();
+
+    final int stringClassObjectId = 1;
+    records.add(new HprofLoadClass(0, 0, stringClassObjectId, 0, strings.get("java.lang.String")));
+    dump.add(new HprofClassDump(stringClassObjectId, 0, 0, 0, 0, 0, 0, 0, 0,
+          new HprofConstant[0], new HprofStaticField[0],
+          new HprofInstanceField[]{
+            new HprofInstanceField(strings.get("count"), HprofType.TYPE_INT),
+            new HprofInstanceField(strings.get("hashCode"), HprofType.TYPE_INT),
+            new HprofInstanceField(strings.get("offset"), HprofType.TYPE_INT),
+            new HprofInstanceField(strings.get("value"), HprofType.TYPE_OBJECT)}));
+
+    dump.add(new HprofPrimitiveArrayDump(0x41, 0, HprofType.TYPE_CHAR,
+          new long[]{'n', 'o', 't', ' ', 'h', 'e', 'l', 'l', 'o', 'o', 'p'}));
+
+    ByteArrayDataOutput values = ByteStreams.newDataOutput();
+    values.writeInt(5);     // count
+    values.writeInt(0);     // hashCode
+    values.writeInt(4);     // offset
+    values.writeInt(0x41);  // value
+    dump.add(new HprofInstanceDump(0x42, 0, stringClassObjectId, values.toByteArray()));
+    dump.add(new HprofRootDebugger(stringClassObjectId));
+    dump.add(new HprofRootDebugger(0x42));
+
+    records.add(new HprofHeapDump(0, dump.toArray(new HprofDumpRecord[0])));
+    AhatSnapshot snapshot = SnapshotBuilder.makeSnapshot(strings, records);
+
+    // Diffing should not crash.
+    Diff.snapshots(snapshot, snapshot);
+  }
+
+  @Test
+  public void diffFields() {
+    List<FieldValue> a = new ArrayList<FieldValue>();
+    a.add(new FieldValue("n0", "t0", null));
+    a.add(new FieldValue("n2", "t2", null));
+    a.add(new FieldValue("n3", "t3", null));
+    a.add(new FieldValue("n4", "t4", null));
+    a.add(new FieldValue("n5", "t5", null));
+    a.add(new FieldValue("n6", "t6", null));
+
+    List<FieldValue> b = new ArrayList<FieldValue>();
+    b.add(new FieldValue("n0", "t0", null));
+    b.add(new FieldValue("n1", "t1", null));
+    b.add(new FieldValue("n2", "t2", null));
+    b.add(new FieldValue("n3", "t3", null));
+    b.add(new FieldValue("n5", "t5", null));
+    b.add(new FieldValue("n6", "t6", null));
+    b.add(new FieldValue("n7", "t7", null));
+
+    Diff.fields(a, b);
+    assertEquals(8, a.size());
+    assertEquals(8, b.size());
+    for (int i = 0; i < 8; i++) {
+      assertEquals(a.get(i), b.get(i).getBaseline());
+      assertEquals(b.get(i), a.get(i).getBaseline());
+    }
+    assertTrue(a.get(1).isPlaceHolder());
+    assertTrue(a.get(7).isPlaceHolder());
+    assertTrue(b.get(4).isPlaceHolder());
+  }
+}
diff --git a/tools/ahat/test/InstanceTest.java b/tools/ahat/test/InstanceTest.java
new file mode 100644
index 0000000..3a50150
--- /dev/null
+++ b/tools/ahat/test/InstanceTest.java
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatClassObj;
+import com.android.ahat.heapdump.AhatHeap;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.PathElement;
+import com.android.ahat.heapdump.Value;
+import com.android.tools.perflib.heap.hprof.HprofClassDump;
+import com.android.tools.perflib.heap.hprof.HprofConstant;
+import com.android.tools.perflib.heap.hprof.HprofDumpRecord;
+import com.android.tools.perflib.heap.hprof.HprofHeapDump;
+import com.android.tools.perflib.heap.hprof.HprofInstanceDump;
+import com.android.tools.perflib.heap.hprof.HprofInstanceField;
+import com.android.tools.perflib.heap.hprof.HprofLoadClass;
+import com.android.tools.perflib.heap.hprof.HprofPrimitiveArrayDump;
+import com.android.tools.perflib.heap.hprof.HprofRecord;
+import com.android.tools.perflib.heap.hprof.HprofRootDebugger;
+import com.android.tools.perflib.heap.hprof.HprofStaticField;
+import com.android.tools.perflib.heap.hprof.HprofStringBuilder;
+import com.android.tools.perflib.heap.hprof.HprofType;
+import com.google.common.io.ByteArrayDataOutput;
+import com.google.common.io.ByteStreams;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class InstanceTest {
+  @Test
+  public void asStringBasic() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("basicString");
+    assertEquals("hello, world", str.asString());
+  }
+
+  @Test
+  public void asStringNonAscii() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("nonAscii");
+    assertEquals("Sigma (Æ©) is not ASCII", str.asString());
+  }
+
+  @Test
+  public void asStringEmbeddedZero() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("embeddedZero");
+    assertEquals("embedded\0...", str.asString());
+  }
+
+  @Test
+  public void asStringCharArray() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("charArray");
+    assertEquals("char thing", str.asString());
+  }
+
+  @Test
+  public void asStringTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("basicString");
+    assertEquals("hello", str.asString(5));
+  }
+
+  @Test
+  public void asStringTruncatedNonAscii() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("nonAscii");
+    assertEquals("Sigma (Æ©)", str.asString(9));
+  }
+
+  @Test
+  public void asStringTruncatedEmbeddedZero() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("embeddedZero");
+    assertEquals("embed", str.asString(5));
+  }
+
+  @Test
+  public void asStringCharArrayTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("charArray");
+    assertEquals("char ", str.asString(5));
+  }
+
+  @Test
+  public void asStringExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("basicString");
+    assertEquals("hello, world", str.asString(12));
+  }
+
+  @Test
+  public void asStringExactMaxNonAscii() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("nonAscii");
+    assertEquals("Sigma (Æ©) is not ASCII", str.asString(22));
+  }
+
+  @Test
+  public void asStringExactMaxEmbeddedZero() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("embeddedZero");
+    assertEquals("embedded\0...", str.asString(12));
+  }
+
+  @Test
+  public void asStringCharArrayExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("charArray");
+    assertEquals("char thing", str.asString(10));
+  }
+
+  @Test
+  public void asStringNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("basicString");
+    assertEquals("hello, world", str.asString(50));
+  }
+
+  @Test
+  public void asStringNotTruncatedNonAscii() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("nonAscii");
+    assertEquals("Sigma (Æ©) is not ASCII", str.asString(50));
+  }
+
+  @Test
+  public void asStringNotTruncatedEmbeddedZero() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("embeddedZero");
+    assertEquals("embedded\0...", str.asString(50));
+  }
+
+  @Test
+  public void asStringCharArrayNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("charArray");
+    assertEquals("char thing", str.asString(50));
+  }
+
+  @Test
+  public void asStringNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("basicString");
+    assertEquals("hello, world", str.asString(-3));
+  }
+
+  @Test
+  public void asStringNegativeMaxNonAscii() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("nonAscii");
+    assertEquals("Sigma (Æ©) is not ASCII", str.asString(-3));
+  }
+
+  @Test
+  public void asStringNegativeMaxEmbeddedZero() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("embeddedZero");
+    assertEquals("embedded\0...", str.asString(-3));
+  }
+
+  @Test
+  public void asStringCharArrayNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance str = dump.getDumpedAhatInstance("charArray");
+    assertEquals("char thing", str.asString(-3));
+  }
+
+  @Test
+  public void asStringNull() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("nullString");
+    assertNull(obj);
+  }
+
+  @Test
+  public void asStringNotString() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("anObject");
+    assertNotNull(obj);
+    assertNull(obj.asString());
+  }
+
+  @Test
+  public void basicReference() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatInstance pref = dump.getDumpedAhatInstance("aPhantomReference");
+    AhatInstance wref = dump.getDumpedAhatInstance("aWeakReference");
+    AhatInstance nref = dump.getDumpedAhatInstance("aNullReferentReference");
+    AhatInstance referent = dump.getDumpedAhatInstance("anObject");
+    assertNotNull(pref);
+    assertNotNull(wref);
+    assertNotNull(nref);
+    assertNotNull(referent);
+    assertEquals(referent, pref.getReferent());
+    assertEquals(referent, wref.getReferent());
+    assertNull(nref.getReferent());
+    assertNull(referent.getReferent());
+  }
+
+  @Test
+  public void unreachableReferent() throws IOException {
+    // The test dump program should never be under enough GC pressure for the
+    // soft reference to be cleared. Ensure that ahat will show the soft
+    // reference as having a non-null referent.
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance ref = dump.getDumpedAhatInstance("aSoftReference");
+    assertNotNull(ref.getReferent());
+  }
+
+  @Test
+  public void gcRootPath() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatClassObj main = dump.getAhatSnapshot().findClass("Main");
+    AhatInstance gcPathArray = dump.getDumpedAhatInstance("gcPathArray");
+    Value value = gcPathArray.asArrayInstance().getValue(2);
+    AhatInstance base = value.asAhatInstance();
+    AhatInstance left = base.getRefField("left");
+    AhatInstance right = base.getRefField("right");
+    AhatInstance target = left.getRefField("right");
+
+    List<PathElement> path = target.getPathFromGcRoot();
+    assertEquals(6, path.size());
+
+    assertEquals(main, path.get(0).instance);
+    assertEquals(".stuff", path.get(0).field);
+    assertTrue(path.get(0).isDominator);
+
+    assertEquals(".gcPathArray", path.get(1).field);
+    assertTrue(path.get(1).isDominator);
+
+    assertEquals(gcPathArray, path.get(2).instance);
+    assertEquals("[2]", path.get(2).field);
+    assertTrue(path.get(2).isDominator);
+
+    assertEquals(base, path.get(3).instance);
+    assertTrue(path.get(3).isDominator);
+
+    // There are two possible paths. Either it can go through the 'left' node,
+    // or the 'right' node.
+    if (path.get(3).field.equals(".left")) {
+      assertEquals(".left", path.get(3).field);
+
+      assertEquals(left, path.get(4).instance);
+      assertEquals(".right", path.get(4).field);
+      assertFalse(path.get(4).isDominator);
+
+    } else {
+      assertEquals(".right", path.get(3).field);
+
+      assertEquals(right, path.get(4).instance);
+      assertEquals(".left", path.get(4).field);
+      assertFalse(path.get(4).isDominator);
+    }
+
+    assertEquals(target, path.get(5).instance);
+    assertEquals("", path.get(5).field);
+    assertTrue(path.get(5).isDominator);
+  }
+
+  @Test
+  public void retainedSize() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    // anObject should not be an immediate dominator of any other object. This
+    // means its retained size should be equal to its size for the heap it was
+    // allocated on, and should be 0 for all other heaps.
+    AhatInstance anObject = dump.getDumpedAhatInstance("anObject");
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    long size = anObject.getSize();
+    assertEquals(size, anObject.getTotalRetainedSize());
+    assertEquals(size, anObject.getRetainedSize(anObject.getHeap()));
+    for (AhatHeap heap : snapshot.getHeaps()) {
+      if (!heap.equals(anObject.getHeap())) {
+        assertEquals(String.format("For heap '%s'", heap.getName()),
+            0, anObject.getRetainedSize(heap));
+      }
+    }
+  }
+
+  @Test
+  public void objectNotABitmap() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("anObject");
+    assertNull(obj.asBitmap());
+  }
+
+  @Test
+  public void arrayNotABitmap() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("gcPathArray");
+    assertNull(obj.asBitmap());
+  }
+
+  @Test
+  public void classObjNotABitmap() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getAhatSnapshot().findClass("Main");
+    assertNull(obj.asBitmap());
+  }
+
+  @Test
+  public void classInstanceToString() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("aPhantomReference");
+    long id = obj.getId();
+    assertEquals(String.format("java.lang.ref.PhantomReference@%08x", id), obj.toString());
+  }
+
+  @Test
+  public void classObjToString() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getAhatSnapshot().findClass("Main");
+    assertEquals("Main", obj.toString());
+  }
+
+  @Test
+  public void arrayInstanceToString() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("gcPathArray");
+    long id = obj.getId();
+
+    // There's a bug in perfib's proguard deobfuscation for arrays.
+    // To work around that bug for the time being, only test the suffix of
+    // the toString result. Ideally we test for string equality against
+    // "Main$ObjectTree[4]@%08x", id.
+    assertTrue(obj.toString().endsWith(String.format("[4]@%08x", id)));
+  }
+
+  @Test
+  public void primArrayInstanceToString() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("bigArray");
+    long id = obj.getId();
+    assertEquals(String.format("byte[1000000]@%08x", id), obj.toString());
+  }
+
+  @Test
+  public void isNotRoot() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    AhatInstance obj = dump.getDumpedAhatInstance("anObject");
+    assertFalse(obj.isRoot());
+    assertNull(obj.getRootTypes());
+  }
+
+  @Test
+  public void asStringEmbedded() throws IOException {
+    // Set up a heap dump with an instance of java.lang.String of
+    // "hello" with instance id 0x42 that is backed by a char array that is
+    // bigger. This is how ART used to represent strings, and we should still
+    // support it in case the heap dump is from a previous platform version.
+    HprofStringBuilder strings = new HprofStringBuilder(0);
+    List<HprofRecord> records = new ArrayList<HprofRecord>();
+    List<HprofDumpRecord> dump = new ArrayList<HprofDumpRecord>();
+
+    final int stringClassObjectId = 1;
+    records.add(new HprofLoadClass(0, 0, stringClassObjectId, 0, strings.get("java.lang.String")));
+    dump.add(new HprofClassDump(stringClassObjectId, 0, 0, 0, 0, 0, 0, 0, 0,
+          new HprofConstant[0], new HprofStaticField[0],
+          new HprofInstanceField[]{
+            new HprofInstanceField(strings.get("count"), HprofType.TYPE_INT),
+            new HprofInstanceField(strings.get("hashCode"), HprofType.TYPE_INT),
+            new HprofInstanceField(strings.get("offset"), HprofType.TYPE_INT),
+            new HprofInstanceField(strings.get("value"), HprofType.TYPE_OBJECT)}));
+
+    dump.add(new HprofPrimitiveArrayDump(0x41, 0, HprofType.TYPE_CHAR,
+          new long[]{'n', 'o', 't', ' ', 'h', 'e', 'l', 'l', 'o', 'o', 'p'}));
+
+    ByteArrayDataOutput values = ByteStreams.newDataOutput();
+    values.writeInt(5);     // count
+    values.writeInt(0);     // hashCode
+    values.writeInt(4);     // offset
+    values.writeInt(0x41);  // value
+    dump.add(new HprofInstanceDump(0x42, 0, stringClassObjectId, values.toByteArray()));
+    dump.add(new HprofRootDebugger(stringClassObjectId));
+    dump.add(new HprofRootDebugger(0x42));
+
+    records.add(new HprofHeapDump(0, dump.toArray(new HprofDumpRecord[0])));
+    AhatSnapshot snapshot = SnapshotBuilder.makeSnapshot(strings, records);
+    AhatInstance chars = snapshot.findInstance(0x41);
+    assertNotNull(chars);
+    assertEquals("not helloop", chars.asString());
+
+    AhatInstance stringInstance = snapshot.findInstance(0x42);
+    assertNotNull(stringInstance);
+    assertEquals("hello", stringInstance.asString());
+  }
+}
diff --git a/tools/ahat/test/InstanceUtilsTest.java b/tools/ahat/test/InstanceUtilsTest.java
deleted file mode 100644
index fe2706d..0000000
--- a/tools/ahat/test/InstanceUtilsTest.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.ArrayInstance;
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Instance;
-import java.io.IOException;
-import java.util.List;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import org.junit.Test;
-
-public class InstanceUtilsTest {
-  @Test
-  public void asStringBasic() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("basicString");
-    assertEquals("hello, world", InstanceUtils.asString(str));
-  }
-
-  @Test
-  public void asStringNonAscii() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("nonAscii");
-    assertEquals("Sigma (\u01a9) is not ASCII", InstanceUtils.asString(str));
-  }
-
-  @Test
-  public void asStringEmbeddedZero() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("embeddedZero");
-    assertEquals("embedded\0...", InstanceUtils.asString(str));
-  }
-
-  @Test
-  public void asStringCharArray() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("charArray");
-    assertEquals("char thing", InstanceUtils.asString(str));
-  }
-
-  @Test
-  public void asStringTruncated() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("basicString");
-    assertEquals("hello", InstanceUtils.asString(str, 5));
-  }
-
-  @Test
-  public void asStringTruncatedNonAscii() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("nonAscii");
-    assertEquals("Sigma (\u01a9)", InstanceUtils.asString(str, 9));
-  }
-
-  @Test
-  public void asStringTruncatedEmbeddedZero() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("embeddedZero");
-    assertEquals("embed", InstanceUtils.asString(str, 5));
-  }
-
-  @Test
-  public void asStringCharArrayTruncated() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("charArray");
-    assertEquals("char ", InstanceUtils.asString(str, 5));
-  }
-
-  @Test
-  public void asStringExactMax() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("basicString");
-    assertEquals("hello, world", InstanceUtils.asString(str, 12));
-  }
-
-  @Test
-  public void asStringExactMaxNonAscii() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("nonAscii");
-    assertEquals("Sigma (\u01a9) is not ASCII", InstanceUtils.asString(str, 22));
-  }
-
-  @Test
-  public void asStringExactMaxEmbeddedZero() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("embeddedZero");
-    assertEquals("embedded\0...", InstanceUtils.asString(str, 12));
-  }
-
-  @Test
-  public void asStringCharArrayExactMax() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("charArray");
-    assertEquals("char thing", InstanceUtils.asString(str, 10));
-  }
-
-  @Test
-  public void asStringNotTruncated() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("basicString");
-    assertEquals("hello, world", InstanceUtils.asString(str, 50));
-  }
-
-  @Test
-  public void asStringNotTruncatedNonAscii() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("nonAscii");
-    assertEquals("Sigma (\u01a9) is not ASCII", InstanceUtils.asString(str, 50));
-  }
-
-  @Test
-  public void asStringNotTruncatedEmbeddedZero() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("embeddedZero");
-    assertEquals("embedded\0...", InstanceUtils.asString(str, 50));
-  }
-
-  @Test
-  public void asStringCharArrayNotTruncated() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("charArray");
-    assertEquals("char thing", InstanceUtils.asString(str, 50));
-  }
-
-  @Test
-  public void asStringNegativeMax() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("basicString");
-    assertEquals("hello, world", InstanceUtils.asString(str, -3));
-  }
-
-  @Test
-  public void asStringNegativeMaxNonAscii() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("nonAscii");
-    assertEquals("Sigma (\u01a9) is not ASCII", InstanceUtils.asString(str, -3));
-  }
-
-  @Test
-  public void asStringNegativeMaxEmbeddedZero() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("embeddedZero");
-    assertEquals("embedded\0...", InstanceUtils.asString(str, -3));
-  }
-
-  @Test
-  public void asStringCharArrayNegativeMax() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance str = (Instance)dump.getDumpedThing("charArray");
-    assertEquals("char thing", InstanceUtils.asString(str, -3));
-  }
-
-  @Test
-  public void asStringNull() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance obj = (Instance)dump.getDumpedThing("nullString");
-    assertNull(InstanceUtils.asString(obj));
-  }
-
-  @Test
-  public void asStringNotString() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-    Instance obj = (Instance)dump.getDumpedThing("anObject");
-    assertNotNull(obj);
-    assertNull(InstanceUtils.asString(obj));
-  }
-
-  @Test
-  public void basicReference() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-
-    Instance pref = (Instance)dump.getDumpedThing("aPhantomReference");
-    Instance wref = (Instance)dump.getDumpedThing("aWeakReference");
-    Instance referent = (Instance)dump.getDumpedThing("anObject");
-    assertNotNull(pref);
-    assertNotNull(wref);
-    assertNotNull(referent);
-    assertEquals(referent, InstanceUtils.getReferent(pref));
-    assertEquals(referent, InstanceUtils.getReferent(wref));
-    assertNull(InstanceUtils.getReferent(referent));
-  }
-
-  @Test
-  public void gcRootPath() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-
-    ClassObj main = dump.getAhatSnapshot().findClass("Main");
-    ArrayInstance gcPathArray = (ArrayInstance)dump.getDumpedThing("gcPathArray");
-    Object[] values = gcPathArray.getValues();
-    Instance base = (Instance)values[2];
-    Instance left = InstanceUtils.getRefField(base, "left");
-    Instance right = InstanceUtils.getRefField(base, "right");
-    Instance target = InstanceUtils.getRefField(left, "right");
-
-    List<InstanceUtils.PathElement> path = InstanceUtils.getPathFromGcRoot(target);
-    assertEquals(6, path.size());
-
-    assertEquals(main, path.get(0).instance);
-    assertEquals(".stuff", path.get(0).field);
-    assertTrue(path.get(0).isDominator);
-
-    assertEquals(".gcPathArray", path.get(1).field);
-    assertTrue(path.get(1).isDominator);
-
-    assertEquals(gcPathArray, path.get(2).instance);
-    assertEquals("[2]", path.get(2).field);
-    assertTrue(path.get(2).isDominator);
-
-    assertEquals(base, path.get(3).instance);
-    assertTrue(path.get(3).isDominator);
-
-    // There are two possible paths. Either it can go through the 'left' node,
-    // or the 'right' node.
-    if (path.get(3).field.equals(".left")) {
-      assertEquals(".left", path.get(3).field);
-
-      assertEquals(left, path.get(4).instance);
-      assertEquals(".right", path.get(4).field);
-      assertFalse(path.get(4).isDominator);
-
-    } else {
-      assertEquals(".right", path.get(3).field);
-
-      assertEquals(right, path.get(4).instance);
-      assertEquals(".left", path.get(4).field);
-      assertFalse(path.get(4).isDominator);
-    }
-
-    assertEquals(target, path.get(5).instance);
-    assertEquals("", path.get(5).field);
-    assertTrue(path.get(5).isDominator);
-  }
-}
diff --git a/tools/ahat/test/NativeAllocationTest.java b/tools/ahat/test/NativeAllocationTest.java
deleted file mode 100644
index 7ad4c1d..0000000
--- a/tools/ahat/test/NativeAllocationTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.Instance;
-import java.io.IOException;
-import static org.junit.Assert.fail;
-import static org.junit.Assert.assertEquals;
-import org.junit.Test;
-
-public class NativeAllocationTest {
-
-  @Test
-  public void nativeAllocation() throws IOException {
-    TestDump dump = TestDump.getTestDump();
-
-    AhatSnapshot snapshot = dump.getAhatSnapshot();
-    Instance referent = (Instance)dump.getDumpedThing("anObject");
-    for (InstanceUtils.NativeAllocation alloc : snapshot.getNativeAllocations()) {
-      if (alloc.referent == referent) {
-        assertEquals(42 , alloc.size);
-        assertEquals(referent.getHeap(), alloc.heap);
-        assertEquals(0xABCDABCD , alloc.pointer);
-        return;
-      }
-    }
-    fail("No native allocation found with anObject as the referent");
-  }
-}
-
diff --git a/tools/ahat/test/ObjectHandlerTest.java b/tools/ahat/test/ObjectHandlerTest.java
new file mode 100644
index 0000000..cd0ba23
--- /dev/null
+++ b/tools/ahat/test/ObjectHandlerTest.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import java.io.IOException;
+import org.junit.Test;
+
+import static org.junit.Assert.assertNotNull;
+
+public class ObjectHandlerTest {
+  @Test
+  public void noCrashClassInstance() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatInstance object = dump.getDumpedAhatInstance("aPhantomReference");
+    assertNotNull(object);
+
+    AhatHandler handler = new ObjectHandler(dump.getAhatSnapshot());
+    TestHandler.testNoCrash(handler, "http://localhost:7100/object?id=" + object.getId());
+  }
+
+  @Test
+  public void noCrashClassObj() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    AhatHandler handler = new ObjectHandler(snapshot);
+
+    AhatInstance object = snapshot.findClass("Main");
+    assertNotNull(object);
+
+    TestHandler.testNoCrash(handler, "http://localhost:7100/object?id=" + object.getId());
+  }
+
+  @Test
+  public void noCrashSystemClassObj() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    AhatHandler handler = new ObjectHandler(snapshot);
+
+    AhatInstance object = snapshot.findClass("java.lang.String");
+    assertNotNull(object);
+
+    TestHandler.testNoCrash(handler, "http://localhost:7100/object?id=" + object.getId());
+  }
+
+  @Test
+  public void noCrashArrayInstance() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatInstance object = dump.getDumpedAhatInstance("gcPathArray");
+    assertNotNull(object);
+
+    AhatHandler handler = new ObjectHandler(dump.getAhatSnapshot());
+    TestHandler.testNoCrash(handler, "http://localhost:7100/object?id=" + object.getId());
+  }
+}
diff --git a/tools/ahat/test/OverviewHandlerTest.java b/tools/ahat/test/OverviewHandlerTest.java
new file mode 100644
index 0000000..c2f773b
--- /dev/null
+++ b/tools/ahat/test/OverviewHandlerTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatSnapshot;
+import java.io.File;
+import java.io.IOException;
+import org.junit.Test;
+
+public class OverviewHandlerTest {
+
+  @Test
+  public void noCrash() throws IOException {
+    AhatSnapshot snapshot = TestDump.getTestDump().getAhatSnapshot();
+    AhatHandler handler = new OverviewHandler(snapshot,
+        new File("my.hprof.file"),
+        new File("my.base.hprof.file"));
+    TestHandler.testNoCrash(handler, "http://localhost:7100");
+  }
+}
diff --git a/tools/ahat/test/PerformanceTest.java b/tools/ahat/test/PerformanceTest.java
index 6e46800..e13974b 100644
--- a/tools/ahat/test/PerformanceTest.java
+++ b/tools/ahat/test/PerformanceTest.java
@@ -16,13 +16,15 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Instance;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import org.junit.Test;
+
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
-import org.junit.Test;
 
 public class PerformanceTest {
   private static class NullOutputStream extends OutputStream {
@@ -36,7 +38,7 @@
     // for any object, including big arrays.
     TestDump dump = TestDump.getTestDump();
 
-    Instance bigArray = (Instance)dump.getDumpedThing("bigArray");
+    AhatInstance bigArray = dump.getDumpedAhatInstance("bigArray");
     assertNotNull(bigArray);
 
     AhatSnapshot snapshot = dump.getAhatSnapshot();
diff --git a/tools/ahat/test/QueryTest.java b/tools/ahat/test/QueryTest.java
index 40e3322..5bcf8ea 100644
--- a/tools/ahat/test/QueryTest.java
+++ b/tools/ahat/test/QueryTest.java
@@ -18,9 +18,10 @@
 
 import java.net.URI;
 import java.net.URISyntaxException;
-import static org.junit.Assert.assertEquals;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+
 public class QueryTest {
   @Test
   public void simple() throws URISyntaxException {
diff --git a/tools/ahat/test/RootedHandlerTest.java b/tools/ahat/test/RootedHandlerTest.java
new file mode 100644
index 0000000..f325b8e
--- /dev/null
+++ b/tools/ahat/test/RootedHandlerTest.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatSnapshot;
+import java.io.IOException;
+import org.junit.Test;
+
+public class RootedHandlerTest {
+  @Test
+  public void noCrash() throws IOException {
+    AhatSnapshot snapshot = TestDump.getTestDump().getAhatSnapshot();
+    AhatHandler handler = new RootedHandler(snapshot);
+    TestHandler.testNoCrash(handler, "http://localhost:7100/rooted");
+  }
+}
diff --git a/tools/ahat/test/SiteHandlerTest.java b/tools/ahat/test/SiteHandlerTest.java
new file mode 100644
index 0000000..37596be
--- /dev/null
+++ b/tools/ahat/test/SiteHandlerTest.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatSnapshot;
+import java.io.IOException;
+import org.junit.Test;
+
+public class SiteHandlerTest {
+  @Test
+  public void noCrash() throws IOException {
+    AhatSnapshot snapshot = TestDump.getTestDump().getAhatSnapshot();
+    AhatHandler handler = new SiteHandler(snapshot);
+    TestHandler.testNoCrash(handler, "http://localhost:7100/sites");
+  }
+}
diff --git a/tools/ahat/test/SnapshotBuilder.java b/tools/ahat/test/SnapshotBuilder.java
new file mode 100644
index 0000000..0eea635
--- /dev/null
+++ b/tools/ahat/test/SnapshotBuilder.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.tools.perflib.heap.ProguardMap;
+import com.android.tools.perflib.heap.hprof.Hprof;
+import com.android.tools.perflib.heap.hprof.HprofRecord;
+import com.android.tools.perflib.heap.hprof.HprofStringBuilder;
+import com.android.tools.perflib.heap.io.InMemoryBuffer;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+/**
+ * Class with utilities to help constructing snapshots for tests.
+ */
+public class SnapshotBuilder {
+
+  // Helper function to make a snapshot with id size 4 given an
+  // HprofStringBuilder and list of HprofRecords
+  public static AhatSnapshot makeSnapshot(HprofStringBuilder strings, List<HprofRecord> records)
+    throws IOException {
+    // TODO: When perflib can handle the case where strings are referred to
+    // before they are defined, just add the string records to the records
+    // list.
+    List<HprofRecord> actualRecords = new ArrayList<HprofRecord>();
+    actualRecords.addAll(strings.getStringRecords());
+    actualRecords.addAll(records);
+
+    Hprof hprof = new Hprof("JAVA PROFILE 1.0.3", 4, new Date(), actualRecords);
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    hprof.write(os);
+    InMemoryBuffer buffer = new InMemoryBuffer(os.toByteArray());
+    return AhatSnapshot.fromDataBuffer(buffer, new ProguardMap());
+  }
+}
diff --git a/tools/ahat/test/SortTest.java b/tools/ahat/test/SortTest.java
deleted file mode 100644
index 02ff7db..0000000
--- a/tools/ahat/test/SortTest.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Heap;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import static org.junit.Assert.assertEquals;
-import org.junit.Test;
-
-public class SortTest {
-  @Test
-  public void objectsInfo() {
-    Heap heapA = new Heap(0xA, "A");
-    Heap heapB = new Heap(0xB, "B");
-    ClassObj classA = new ClassObj(0x1A, null, "classA", 0);
-    ClassObj classB = new ClassObj(0x1B, null, "classB", 0);
-    ClassObj classC = new ClassObj(0x1C, null, "classC", 0);
-    Site.ObjectsInfo infoA = new Site.ObjectsInfo(heapA, classA, 4, 14);
-    Site.ObjectsInfo infoB = new Site.ObjectsInfo(heapB, classB, 2, 15);
-    Site.ObjectsInfo infoC = new Site.ObjectsInfo(heapA, classC, 3, 13);
-    Site.ObjectsInfo infoD = new Site.ObjectsInfo(heapB, classA, 5, 12);
-    Site.ObjectsInfo infoE = new Site.ObjectsInfo(heapA, classB, 1, 11);
-    List<Site.ObjectsInfo> list = new ArrayList<Site.ObjectsInfo>();
-    list.add(infoA);
-    list.add(infoB);
-    list.add(infoC);
-    list.add(infoD);
-    list.add(infoE);
-
-    // Sort by size.
-    Collections.sort(list, new Sort.ObjectsInfoBySize());
-    assertEquals(infoB, list.get(0));
-    assertEquals(infoA, list.get(1));
-    assertEquals(infoC, list.get(2));
-    assertEquals(infoD, list.get(3));
-    assertEquals(infoE, list.get(4));
-
-    // Sort by class name.
-    Collections.sort(list, new Sort.ObjectsInfoByClassName());
-    assertEquals(classA, list.get(0).classObj);
-    assertEquals(classA, list.get(1).classObj);
-    assertEquals(classB, list.get(2).classObj);
-    assertEquals(classB, list.get(3).classObj);
-    assertEquals(classC, list.get(4).classObj);
-
-    // Sort by heap name.
-    Collections.sort(list, new Sort.ObjectsInfoByHeapName());
-    assertEquals(heapA, list.get(0).heap);
-    assertEquals(heapA, list.get(1).heap);
-    assertEquals(heapA, list.get(2).heap);
-    assertEquals(heapB, list.get(3).heap);
-    assertEquals(heapB, list.get(4).heap);
-
-    // Sort first by class name, then by size.
-    Collections.sort(list, new Sort.WithPriority<Site.ObjectsInfo>(
-          new Sort.ObjectsInfoByClassName(),
-          new Sort.ObjectsInfoBySize()));
-    assertEquals(infoA, list.get(0));
-    assertEquals(infoD, list.get(1));
-    assertEquals(infoB, list.get(2));
-    assertEquals(infoE, list.get(3));
-    assertEquals(infoC, list.get(4));
-  }
-}
diff --git a/tools/ahat/test/TestDump.java b/tools/ahat/test/TestDump.java
index ebce61c..ceb7346 100644
--- a/tools/ahat/test/TestDump.java
+++ b/tools/ahat/test/TestDump.java
@@ -16,14 +16,16 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Field;
-import com.android.tools.perflib.heap.Instance;
+import com.android.ahat.heapdump.AhatClassObj;
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import com.android.ahat.heapdump.Diff;
+import com.android.ahat.heapdump.FieldValue;
+import com.android.ahat.heapdump.Value;
 import com.android.tools.perflib.heap.ProguardMap;
 import java.io.File;
 import java.io.IOException;
 import java.text.ParseException;
-import java.util.Map;
 
 /**
  * The TestDump class is used to get an AhatSnapshot for the test-dump
@@ -37,30 +39,46 @@
   // is visible to other test cases.
   private static TestDump mCachedTestDump = null;
 
+  // If the test dump fails to load the first time, it will likely fail every
+  // other test we try. Rather than having to wait a potentially very long
+  // time for test dump loading to fail over and over again, record when it
+  // fails and don't try to load it again.
+  private static boolean mTestDumpFailed = false;
+
   private AhatSnapshot mSnapshot = null;
+  private AhatSnapshot mBaseline = null;
 
   /**
-   * Load the test-dump.hprof file.
-   * The location of the file is read from the system property
-   * "ahat.test.dump.hprof", which is expected to be set on the command line.
-   * For example:
-   *   java -Dahat.test.dump.hprof=test-dump.hprof -jar ahat-tests.jar
+   * Load the test-dump.hprof and test-dump-base.hprof files.
+   * The location of the files are read from the system properties
+   * "ahat.test.dump.hprof" and "ahat.test.dump.base.hprof", which is expected
+   * to be set on the command line.
+   * The location of the proguard map for both hprof files is read from the
+   * system property "ahat.test.dump.map".  For example:
+   *   java -Dahat.test.dump.hprof=test-dump.hprof \
+   *        -Dahat.test.dump.base.hprof=test-dump-base.hprof \
+   *        -Dahat.test.dump.map=proguard.map \
+   *        -jar ahat-tests.jar
    *
-   * An IOException is thrown if there is a failure reading the hprof file or
+   * An IOException is thrown if there is a failure reading the hprof files or
    * the proguard map.
    */
   private TestDump() throws IOException {
-      String hprof = System.getProperty("ahat.test.dump.hprof");
+    // TODO: Make use of the baseline hprof for tests.
+    String hprof = System.getProperty("ahat.test.dump.hprof");
+    String hprofBase = System.getProperty("ahat.test.dump.base.hprof");
 
-      String mapfile = System.getProperty("ahat.test.dump.map");
-      ProguardMap map = new ProguardMap();
-      try {
-        map.readFromFile(new File(mapfile));
-      } catch (ParseException e) {
-        throw new IOException("Unable to load proguard map", e);
-      }
+    String mapfile = System.getProperty("ahat.test.dump.map");
+    ProguardMap map = new ProguardMap();
+    try {
+      map.readFromFile(new File(mapfile));
+    } catch (ParseException e) {
+      throw new IOException("Unable to load proguard map", e);
+    }
 
-      mSnapshot = AhatSnapshot.fromHprof(new File(hprof), map);
+    mSnapshot = AhatSnapshot.fromHprof(new File(hprof), map);
+    mBaseline = AhatSnapshot.fromHprof(new File(hprofBase), map);
+    Diff.snapshots(mSnapshot, mBaseline);
   }
 
   /**
@@ -71,18 +89,59 @@
   }
 
   /**
-   * Return the value of a field in the DumpedStuff instance in the
+   * Get the baseline AhatSnapshot for the test dump program.
+   */
+  public AhatSnapshot getBaselineAhatSnapshot() {
+    return mBaseline;
+  }
+
+  /**
+   * Returns the value of a field in the DumpedStuff instance in the
    * snapshot for the test-dump program.
    */
-  public Object getDumpedThing(String name) {
-    ClassObj main = mSnapshot.findClass("Main");
-    Instance stuff = null;
-    for (Map.Entry<Field, Object> fields : main.getStaticFieldValues().entrySet()) {
-      if ("stuff".equals(fields.getKey().getName())) {
-        stuff = (Instance) fields.getValue();
+  public Value getDumpedValue(String name) {
+    return getDumpedValue(name, mSnapshot);
+  }
+
+  /**
+   * Returns the value of a field in the DumpedStuff instance in the
+   * baseline snapshot for the test-dump program.
+   */
+  public Value getBaselineDumpedValue(String name) {
+    return getDumpedValue(name, mBaseline);
+  }
+
+  /**
+   * Returns the value of a field in the DumpedStuff instance in the
+   * given snapshot for the test-dump program.
+   */
+  private Value getDumpedValue(String name, AhatSnapshot snapshot) {
+    AhatClassObj main = snapshot.findClass("Main");
+    AhatInstance stuff = null;
+    for (FieldValue fields : main.getStaticFieldValues()) {
+      if ("stuff".equals(fields.getName())) {
+        stuff = fields.getValue().asAhatInstance();
       }
     }
-    return InstanceUtils.getField(stuff, name);
+    return stuff.getField(name);
+  }
+
+  /**
+   * Returns the value of a non-primitive field in the DumpedStuff instance in
+   * the snapshot for the test-dump program.
+   */
+  public AhatInstance getDumpedAhatInstance(String name) {
+    Value value = getDumpedValue(name);
+    return value == null ? null : value.asAhatInstance();
+  }
+
+  /**
+   * Returns the value of a non-primitive field in the DumpedStuff instance in
+   * the baseline snapshot for the test-dump program.
+   */
+  public AhatInstance getBaselineDumpedAhatInstance(String name) {
+    Value value = getBaselineDumpedValue(name);
+    return value == null ? null : value.asAhatInstance();
   }
 
   /**
@@ -93,8 +152,14 @@
    * when possible.
    */
   public static synchronized TestDump getTestDump() throws IOException {
+    if (mTestDumpFailed) {
+      throw new RuntimeException("Test dump failed before, assuming it will again");
+    }
+
     if (mCachedTestDump == null) {
+      mTestDumpFailed = true;
       mCachedTestDump = new TestDump();
+      mTestDumpFailed = false;
     }
     return mCachedTestDump;
   }
diff --git a/tools/ahat/test/TestHandler.java b/tools/ahat/test/TestHandler.java
new file mode 100644
index 0000000..859e39a
--- /dev/null
+++ b/tools/ahat/test/TestHandler.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+
+/**
+ * Provide common utilities for basic handler tests.
+ */
+public class TestHandler {
+  private static class NullOutputStream extends OutputStream {
+    public void write(int b) throws IOException {
+    }
+  }
+
+  /**
+   * Test that the given handler doesn't crash on the given query.
+   */
+  public static void testNoCrash(AhatHandler handler, String uri) throws IOException {
+    PrintStream ps = new PrintStream(new NullOutputStream());
+    HtmlDoc doc = new HtmlDoc(ps, DocString.text("noCrash test"), DocString.uri("style.css"));
+    Query query = new Query(DocString.uri(uri));
+    handler.handle(doc, query);
+  }
+}
diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java
index 3291470..2fd3286 100644
--- a/tools/ahat/test/Tests.java
+++ b/tools/ahat/test/Tests.java
@@ -22,11 +22,14 @@
   public static void main(String[] args) {
     if (args.length == 0) {
       args = new String[]{
-        "com.android.ahat.InstanceUtilsTest",
-        "com.android.ahat.NativeAllocationTest",
+        "com.android.ahat.DiffTest",
+        "com.android.ahat.InstanceTest",
+        "com.android.ahat.ObjectHandlerTest",
+        "com.android.ahat.OverviewHandlerTest",
         "com.android.ahat.PerformanceTest",
+        "com.android.ahat.RootedHandlerTest",
         "com.android.ahat.QueryTest",
-        "com.android.ahat.SortTest",
+        "com.android.ahat.SiteHandlerTest",
       };
     }
     JUnitCore.main(args);
diff --git a/tools/findbuildbotwarnings.py b/tools/findbuildbotwarnings.py
new file mode 100755
index 0000000..a172dd6
--- /dev/null
+++ b/tools/findbuildbotwarnings.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Outputs the warnings that are common to all builders.
+
+Suppressed tests that are nonetheless passing are output as warnings
+by vogar.  Any tests that generate warnings in every builder are good
+candidates for no longer being suppressed, since they're passing on
+a regular basis."""
+
+import collections
+import json
+import requests
+
+# The number of recent builds to check for each builder
+NUM_BUILDS = 5
+# The buildbot step to check for warnings
+BUILDBOT_STEP = 'test libcore'
+
+
+def main():
+    # Dict from builder+build_num combination to the list of warnings
+    # in that build
+    warnings = collections.defaultdict(list)
+    r = requests.get('https://build.chromium.org/p/client.art/json/builders')
+    if r.status_code != 200:
+        print r.text
+        return
+    builders = json.loads(r.text)
+    for builder_name in sorted(builders):
+        # Build -1 is the currently-running build (if there is one), so we
+        # start with -2, which should be the most or second-most
+        # recently-completed build.
+        for build_num in range(-2, -2 - NUM_BUILDS, -1):
+            print ('Loading data for %s, build %d...'
+                   % (builder_name, build_num))
+            r = requests.get(
+                'https://build.chromium.org/p/client.art'
+                '/json/builders/%s/builds/%d' % (
+                builder_name, build_num))
+            if r.status_code != 200:
+                print r.text
+                return
+            builder = json.loads(r.text)
+            libcore_steps = [x for x in builder['steps']
+                             if x['name'] == BUILDBOT_STEP]
+            for ls in libcore_steps:
+                stdio_logs = [x for x in ls['logs'] if x[0] == 'stdio']
+                for sl in stdio_logs:
+                    # The default link is HTML, so append /text to get the
+                    # text version
+                    r = requests.get(sl[1] + '/text')
+                    if r.status_code != 200:
+                        print r.text
+                        return
+                    stdio = r.text.splitlines()
+
+                    # Walk from the back of the list to find the start of the
+                    # warnings summary
+                    i = -1
+                    try:
+                        while not stdio[i].startswith('Warnings summary:'):
+                            i -= 1
+                        i += 1   # Ignore the "Warnings summary:" line
+                        while i < -1:
+                            warnings['%s:%d' % (builder_name, build_num)].append(stdio[i])
+                            i += 1
+                    except IndexError:
+                        # Some builds don't have any
+                        print '  No warnings section found.'
+    # sharedwarnings will build up the intersection of all the lists of
+    # warnings.  We seed it with an arbitrary starting point (which is fine
+    # since intersection is commutative).
+    sharedwarnings = set(warnings.popitem()[1])
+    for warning_list in warnings.itervalues():
+        sharedwarnings = sharedwarnings & set(warning_list)
+    print 'Warnings shared across all builders:'
+    for warning in sorted(list(sharedwarnings)):
+        print warning
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 08abdb3..e0aae46 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -105,12 +105,6 @@
   names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_getErrorStream"]
 },
 {
-  description: "Error decoding digital signature bytes.",
-  result: EXEC_FAILED,
-  name: "org.apache.harmony.security.tests.java.security.Signature2Test#test_verify$BII",
-  bug: 18869265
-},
-{
   description: "Test sometimes timeouts on volantis, and on most modes in debug mode",
   result: EXEC_TIMEOUT,
   names: ["libcore.java.lang.SystemTest#testArrayCopyConcurrentModification"],
@@ -222,5 +216,55 @@
   modes: [device],
   names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit",
           "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
+},
+{
+  description: "Linker issues with libjavacoretests",
+  result: EXEC_FAILED,
+  bug: 35417197,
+  modes: [device],
+  names: [
+    "dalvik.system.JniTest#testGetSuperclass",
+    "dalvik.system.JniTest#testPassingBooleans",
+    "dalvik.system.JniTest#testPassingBytes",
+    "dalvik.system.JniTest#testPassingChars",
+    "dalvik.system.JniTest#testPassingClass",
+    "dalvik.system.JniTest#testPassingDoubles",
+    "dalvik.system.JniTest#testPassingFloats",
+    "dalvik.system.JniTest#testPassingInts",
+    "dalvik.system.JniTest#testPassingLongs",
+    "dalvik.system.JniTest#testPassingObjectReferences",
+    "dalvik.system.JniTest#testPassingShorts",
+    "dalvik.system.JniTest#testPassingThis",
+    "libcore.java.lang.OldSystemTest#test_load",
+    "libcore.java.lang.ThreadTest#testContextClassLoaderIsInherited",
+    "libcore.java.lang.ThreadTest#testContextClassLoaderIsNotNull",
+    "libcore.java.lang.ThreadTest#testGetAllStackTracesIncludesAllGroups",
+    "libcore.java.lang.ThreadTest#testGetStackTrace",
+    "libcore.java.lang.ThreadTest#testJavaContextClassLoader",
+    "libcore.java.lang.ThreadTest#testLeakingStartedThreads",
+    "libcore.java.lang.ThreadTest#testLeakingUnstartedThreads",
+    "libcore.java.lang.ThreadTest#testNativeThreadNames",
+    "libcore.java.lang.ThreadTest#testParkUntilWithUnderflowValue",
+    "libcore.java.lang.ThreadTest#testThreadDoubleStart",
+    "libcore.java.lang.ThreadTest#testThreadInterrupted",
+    "libcore.java.lang.ThreadTest#testThreadRestart",
+    "libcore.java.lang.ThreadTest#testThreadSleep",
+    "libcore.java.lang.ThreadTest#testThreadSleepIllegalArguments",
+    "libcore.java.lang.ThreadTest#testThreadWakeup",
+    "libcore.java.lang.ThreadTest#testUncaughtExceptionPreHandler_calledBeforeDefaultHandler",
+    "libcore.java.lang.ThreadTest#testUncaughtExceptionPreHandler_noDefaultHandler",
+    "libcore.java.util.TimeZoneTest#testDisplayNamesWithScript",
+    "libcore.java.util.zip.ZipEntryTest#testCommentAndExtraInSameOrder",
+    "libcore.java.util.zip.ZipEntryTest#testMaxLengthExtra",
+    "libcore.util.NativeAllocationRegistryTest#testBadSize",
+    "libcore.util.NativeAllocationRegistryTest#testEarlyFree",
+    "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndNoSharedRegistry",
+    "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndSharedRegistry",
+    "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
+    "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
+    "libcore.util.NativeAllocationRegistryTest#testNullArguments",
+    "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_y",
+    "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_yy"
+  ]
 }
 ]
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 1e9c763..7eaaaf9 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -17,9 +17,33 @@
 green='\033[0;32m'
 nc='\033[0m'
 
+# Setup as root, as the next buildbot step (device cleanup) requires it.
+# This is also required to set the date, if needed.
+adb root
+adb wait-for-device
+
+echo -e "${green}Date on host${nc}"
+date
+
 echo -e "${green}Date on device${nc}"
 adb shell date
 
+host_seconds_since_epoch=$(date -u +%s)
+device_seconds_since_epoch=$(adb shell date -u +%s)
+
+abs_time_difference_in_seconds=$(expr $host_seconds_since_epoch - $device_seconds_since_epoch)
+if [ $abs_time_difference_in_seconds -lt 0 ]; then
+  abs_time_difference_in_seconds=$(expr 0 - $abs_time_difference_in_seconds)
+fi
+
+seconds_per_hour=3600
+
+# Update date on device if the difference with host is more than one hour.
+if [ $abs_time_difference_in_seconds -gt $seconds_per_hour ]; then
+  echo -e "${green}Update date on device${nc}"
+  adb shell date -u @$host_seconds_since_epoch
+fi
+
 echo -e "${green}Turn off selinux${nc}"
 adb shell setenforce 0
 adb shell getenforce