Merge "MIPS: Eliminate hard-coded offsets in branches"
diff --git a/Android.bp b/Android.bp
index cb72082..0ce7916 100644
--- a/Android.bp
+++ b/Android.bp
@@ -32,6 +32,7 @@
     "imgdiag",
     "oatdump",
     "openjdkjvm",
+    "openjdkjvmti",
     "patchoat",
     "profman",
     "runtime",
diff --git a/Android.mk b/Android.mk
index 8735d7c..7081f7b 100644
--- a/Android.mk
+++ b/Android.mk
@@ -457,7 +457,7 @@
 build-art-target: $(TARGET_OUT_EXECUTABLES)/art $(ART_TARGET_DEPENDENCIES) $(TARGET_CORE_IMG_OUTS)
 
 ########################################################################
-# Phony target for only building what go/lem requires on target.
+# Phony target for only building what go/lem requires for pushing ART on /data.
 .PHONY: build-art-target-golem
 # Also include libartbenchmark, we always include it when running golem.
 # libstdc++ is needed when building for ART_TARGET_LINUX.
@@ -482,6 +482,11 @@
                       $(ART_HOST_SHARED_LIBRARY_BENCHMARK)
 
 ########################################################################
+# Phony target for building what go/lem requires for syncing /system to target.
+.PHONY: build-art-unbundled-golem
+build-art-unbundled-golem: art-runtime linker oatdump $(TARGET_CORE_JARS)
+
+########################################################################
 # Rules for building all dependencies for tests.
 
 .PHONY: build-art-host-tests
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 66ac897..247f4e3 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -18,11 +18,13 @@
 
 ART_CPPLINT := $(LOCAL_PATH)/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
-ART_CPPLINT_FLAGS := --root=$(TOP)
+# Use `pwd` instead of $TOP for root, $TOP is always . and --root doesn't seem
+# to work with a relative path (b/34787652).
+ART_CPPLINT_FLAGS := --root=`pwd`
 ART_CPPLINT_QUIET := --quiet
 ART_CPPLINT_INGORED := \
     runtime/elf.h \
-    runtime/openjdkjvmti/include/jvmti.h
+    openjdkjvmti/include/jvmti.h
 
 # This:
 #  1) Gets a list of all .h & .cc files in the art directory.
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index a9a718f..0d38620 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -95,7 +95,7 @@
     const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                           compiled_method->GetInstructionSet());
     LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code;
-    class_linker_->SetEntryPointsToCompiledCode(method, method_code);
+    method->SetEntryPointFromQuickCompiledCode(method_code);
   } else {
     // No code? You must mean to go into the interpreter.
     // Or the generic JNI...
diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc
index 7b56f3e..1ef3ba7 100644
--- a/compiler/dex/dex_to_dex_decompiler_test.cc
+++ b/compiler/dex/dex_to_dex_decompiler_test.cc
@@ -29,6 +29,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "verifier/method_verifier-inl.h"
+#include "verifier/verifier_deps.h"
 
 namespace art {
 
@@ -39,6 +40,11 @@
     TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_options_->boot_image_ = false;
     compiler_options_->SetCompilerFilter(CompilerFilter::kQuicken);
+    // Create the main VerifierDeps, here instead of in the compiler since we want to aggregate
+    // the results for all the dex files, not just the results for the current dex file.
+    Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(
+        new verifier::VerifierDeps(GetDexFiles(class_loader)));
+    compiler_driver_->SetDexFilesForOatFile(GetDexFiles(class_loader));
     compiler_driver_->CompileAll(class_loader, GetDexFiles(class_loader), &timings);
   }
 
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 872f7ea..c7e9f4f 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -16,6 +16,7 @@
 
 #include "quick_compiler_callbacks.h"
 
+#include "driver/compiler_driver.h"
 #include "verification_results.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -33,4 +34,17 @@
   }
 }
 
+bool QuickCompilerCallbacks::CanAssumeVerified(ClassReference ref) {
+  // If we don't have class unloading enabled in the compiler, we will never see class that were
+  // previously verified. Return false to avoid overhead from the lookup in the compiler driver.
+  if (!does_class_unloading_) {
+    return false;
+  }
+  DCHECK(compiler_driver_ != nullptr);
+  // In the case of the quicken filter: avoiding verification of quickened instructions, which the
+  // verifier doesn't currently support.
+  // In the case of the verify filter, avoiding verifiying twice.
+  return compiler_driver_->CanAssumeVerified(ref);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index a3a6c09..578aff4 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -22,6 +22,7 @@
 
 namespace art {
 
+class CompilerDriver;
 class VerificationResults;
 
 class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
@@ -53,8 +54,19 @@
       verification_results_ = verification_results;
     }
 
+    bool CanAssumeVerified(ClassReference ref) OVERRIDE;
+
+    void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver)
+        OVERRIDE {
+      does_class_unloading_ = does_class_unloading;
+      compiler_driver_ = compiler_driver;
+      DCHECK(!does_class_unloading || compiler_driver_ != nullptr);
+    }
+
   private:
     VerificationResults* verification_results_ = nullptr;
+    bool does_class_unloading_ = false;
+    CompilerDriver* compiler_driver_ = nullptr;
     std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
 };
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 756481d..037e458 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -303,7 +303,6 @@
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(true),
-      dex_files_for_oat_file_(nullptr),
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
       max_arena_alloc_(0),
@@ -1915,8 +1914,8 @@
                                 TimingLogger* timings) {
   verifier::VerifierDeps* verifier_deps =
       Runtime::Current()->GetCompilerCallbacks()->GetVerifierDeps();
-  // If there is an existing `VerifierDeps`, try to use it for fast verification.
-  if (verifier_deps == nullptr) {
+  // If there exist VerifierDeps that aren't the ones we just created to output, use them to verify.
+  if (verifier_deps == nullptr || verifier_deps->OutputOnly()) {
     return false;
   }
   TimingLogger::ScopedTiming t("Fast Verify", timings);
@@ -1936,14 +1935,12 @@
   // time. So instead we assume these classes still need to be verified at
   // runtime.
   for (const DexFile* dex_file : dex_files) {
-    // Fetch the list of unverified classes and turn it into a set for faster
-    // lookups.
-    const std::vector<dex::TypeIndex>& unverified_classes =
+    // Fetch the list of unverified classes.
+    const std::set<dex::TypeIndex>& unverified_classes =
         verifier_deps->GetUnverifiedClasses(*dex_file);
-    std::set<dex::TypeIndex> set(unverified_classes.begin(), unverified_classes.end());
     for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
       const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-      if (set.find(class_def.class_idx_) == set.end()) {
+      if (unverified_classes.find(class_def.class_idx_) == unverified_classes.end()) {
         if (compiler_only_verifies) {
           // Just update the compiled_classes_ map. The compiler doesn't need to resolve
           // the type.
@@ -1983,13 +1980,6 @@
 void CompilerDriver::Verify(jobject jclass_loader,
                             const std::vector<const DexFile*>& dex_files,
                             TimingLogger* timings) {
-  // Always add the dex files to compiled_classes_. This happens for all compiler filters.
-  for (const DexFile* dex_file : dex_files) {
-    if (!compiled_classes_.HaveDexFile(dex_file)) {
-      compiled_classes_.AddDexFile(dex_file, dex_file->NumClassDefs());
-    }
-  }
-
   if (FastVerify(jclass_loader, dex_files, timings)) {
     return;
   }
@@ -1999,14 +1989,16 @@
   // non boot image compilation. The verifier will need it to record the new dependencies.
   // Then dex2oat can update the vdex file with these new dependencies.
   if (!GetCompilerOptions().IsBootImage()) {
+    // Dex2oat creates the verifier deps.
     // Create the main VerifierDeps, and set it to this thread.
-    verifier::VerifierDeps* verifier_deps = new verifier::VerifierDeps(dex_files);
-    Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(verifier_deps);
+    verifier::VerifierDeps* verifier_deps =
+        Runtime::Current()->GetCompilerCallbacks()->GetVerifierDeps();
+    CHECK(verifier_deps != nullptr);
     Thread::Current()->SetVerifierDeps(verifier_deps);
     // Create per-thread VerifierDeps to avoid contention on the main one.
     // We will merge them after verification.
     for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) {
-      worker->GetThread()->SetVerifierDeps(new verifier::VerifierDeps(dex_files));
+      worker->GetThread()->SetVerifierDeps(new verifier::VerifierDeps(dex_files_for_oat_file_));
     }
   }
 
@@ -2031,7 +2023,7 @@
     for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) {
       verifier::VerifierDeps* thread_deps = worker->GetThread()->GetVerifierDeps();
       worker->GetThread()->SetVerifierDeps(nullptr);
-      verifier_deps->MergeWith(*thread_deps, dex_files);;
+      verifier_deps->MergeWith(*thread_deps, dex_files_for_oat_file_);
       delete thread_deps;
     }
     Thread::Current()->SetVerifierDeps(nullptr);
@@ -2702,7 +2694,14 @@
             : profile_compilation_info_->DumpInfo(&dex_files));
   }
 
-  DCHECK(current_dex_to_dex_methods_ == nullptr);
+  current_dex_to_dex_methods_ = nullptr;
+  Thread* const self = Thread::Current();
+  {
+    // Clear in case we aren't the first call to Compile.
+    MutexLock mu(self, dex_to_dex_references_lock_);
+    dex_to_dex_references_.clear();
+  }
+
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader,
@@ -2721,7 +2720,7 @@
   {
     // From this point on, we shall not modify dex_to_dex_references_, so
     // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
+    MutexLock lock(self, dex_to_dex_references_lock_);
     dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
   }
   for (const auto& method_set : dex_to_dex_references) {
@@ -2880,9 +2879,9 @@
 bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const {
   DCHECK(status != nullptr);
   // The table doesn't know if something wasn't inserted. For this case it will return
-  // kStatusNotReady. To handle this, just assume anything not verified is not compiled.
+  // kStatusNotReady. To handle this, just assume anything we didn't try to verify is not compiled.
   if (!compiled_classes_.Get(DexFileReference(ref.first, ref.second), status) ||
-      *status < mirror::Class::kStatusVerified) {
+      *status < mirror::Class::kStatusRetryVerificationAtRuntime) {
     return false;
   }
   return true;
@@ -2914,7 +2913,7 @@
       if (kIsDebugBuild) {
         // Check to make sure it's not a dex file for an oat file we are compiling since these
         // should always succeed. These do not include classes in for used libraries.
-        for (const DexFile* dex_file : *dex_files_for_oat_file_) {
+        for (const DexFile* dex_file : GetDexFilesForOatFile()) {
           CHECK_NE(dex_ref.dex_file, dex_file) << dex_ref.dex_file->GetLocation();
         }
       }
@@ -3032,4 +3031,19 @@
   single_thread_pool_.reset();
 }
 
+void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
+  dex_files_for_oat_file_ = dex_files;
+  for (const DexFile* dex_file : dex_files) {
+    if (!compiled_classes_.HaveDexFile(dex_file)) {
+      compiled_classes_.AddDexFile(dex_file, dex_file->NumClassDefs());
+    }
+  }
+}
+
+bool CompilerDriver::CanAssumeVerified(ClassReference ref) const {
+  mirror::Class::Status existing = mirror::Class::kStatusNotReady;
+  compiled_classes_.Get(DexFileReference(ref.first, ref.second), &existing);
+  return existing >= mirror::Class::kStatusVerified;
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ecaed83..11808c1 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,6 +22,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include "android-base/strings.h"
+
 #include "arch/instruction_set.h"
 #include "base/array_ref.h"
 #include "base/bit_utils.h"
@@ -103,15 +105,11 @@
   ~CompilerDriver();
 
   // Set dex files that will be stored in the oat file after being compiled.
-  void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
-    dex_files_for_oat_file_ = &dex_files;
-  }
+  void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files);
 
   // Get dex file that will be stored in the oat file after being compiled.
   ArrayRef<const DexFile* const> GetDexFilesForOatFile() const {
-    return (dex_files_for_oat_file_ != nullptr)
-        ? ArrayRef<const DexFile* const>(*dex_files_for_oat_file_)
-        : ArrayRef<const DexFile* const>();
+    return ArrayRef<const DexFile* const>(dex_files_for_oat_file_);
   }
 
   void CompileAll(jobject class_loader,
@@ -381,6 +379,16 @@
     return profile_compilation_info_;
   }
 
+  bool CanAssumeVerified(ClassReference ref) const;
+
+  // Is `boot_image_filename` the name of a core image (small boot
+  // image used for ART testing only)?
+  static bool IsCoreImageFilename(const std::string& boot_image_filename) {
+    // TODO: This is under-approximating...
+    return android::base::EndsWith(boot_image_filename, "core.art")
+        || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
+  }
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
@@ -532,7 +540,7 @@
   bool support_boot_image_fixup_;
 
   // List of dex files that will be stored in the oat file.
-  const std::vector<const DexFile*>* dex_files_for_oat_file_;
+  std::vector<const DexFile*> dex_files_for_oat_file_;
 
   CompiledMethodStorage compiled_method_storage_;
 
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 10bfd97..392d57c 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "compiler_callbacks.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "gc/heap.h"
@@ -42,7 +43,9 @@
   void CompileAll(jobject class_loader) REQUIRES(!Locks::mutator_lock_) {
     TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
     TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
-    compiler_driver_->CompileAll(class_loader, GetDexFiles(class_loader), &timings);
+    dex_files_ = GetDexFiles(class_loader);
+    compiler_driver_->SetDexFilesForOatFile(dex_files_);;
+    compiler_driver_->CompileAll(class_loader, dex_files_, &timings);
     t.NewTiming("MakeAllExecutable");
     MakeAllExecutable(class_loader);
   }
@@ -95,6 +98,7 @@
   JNIEnv* env_;
   jclass class_;
   jmethodID mid_;
+  std::vector<const DexFile*> dex_files_;
 };
 
 // Disabled due to 10 second runtime on host
@@ -363,6 +367,49 @@
   CheckVerifiedClass(class_loader, "LSecond;");
 }
 
+// Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the
+// driver.
+// Test that checks that classes can be assumed as verified if unloading mode is enabled and
+// the class status is at least verified.
+TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) {
+  Thread* const self = Thread::Current();
+  jobject class_loader;
+  std::vector<const DexFile*> dex_files;
+  const DexFile* dex_file = nullptr;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+    ASSERT_NE(class_loader, nullptr);
+    dex_files = GetDexFiles(class_loader);
+    ASSERT_GT(dex_files.size(), 0u);
+    dex_file = dex_files.front();
+  }
+  compiler_driver_->SetDexFilesForOatFile(dex_files);
+  callbacks_->SetDoesClassUnloading(true, compiler_driver_.get());
+  ClassReference ref(dex_file, 0u);
+  // Test that the status is read from the compiler driver as expected.
+  for (size_t i = mirror::Class::kStatusRetryVerificationAtRuntime;
+      i < mirror::Class::kStatusMax;
+      ++i) {
+    const mirror::Class::Status expected_status = static_cast<mirror::Class::Status>(i);
+    // Skip unsupported status that are not supposed to be ever recorded.
+    if (expected_status == mirror::Class::kStatusVerifyingAtRuntime ||
+        expected_status == mirror::Class::kStatusInitializing) {
+      continue;
+    }
+    compiler_driver_->RecordClassStatus(ref, expected_status);
+    mirror::Class::Status status = {};
+    ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status));
+    EXPECT_EQ(status, expected_status);
+
+    // Check that we can assume verified if we are a status that is at least verified.
+    if (status >= mirror::Class::kStatusVerified) {
+      // Check that the class can be assumed as verified in the compiler driver.
+      EXPECT_TRUE(callbacks_->CanAssumeVerified(ref)) << status;
+    }
+  }
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 3cacc2c..538845d 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -18,6 +18,8 @@
 
 #include <fstream>
 
+#include "runtime.h"
+
 namespace art {
 
 CompilerOptions::CompilerOptions()
@@ -30,6 +32,7 @@
       inline_max_code_units_(kUnsetInlineMaxCodeUnits),
       no_inline_from_(nullptr),
       boot_image_(false),
+      core_image_(false),
       app_image_(false),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
@@ -55,6 +58,19 @@
   // because we don't want to include the PassManagerOptions definition from the header file.
 }
 
+bool CompilerOptions::EmitRunTimeChecksInDebugMode() const {
+  // Run-time checks (e.g. Marking Register checks) are only emitted
+  // in debug mode, and
+  // - when running on device; or
+  // - when running on host, but only
+  //   - when compiling the core image (which is used only for testing); or
+  //   - when JIT compiling (only relevant for non-native methods).
+  // This is to prevent these checks from being emitted into pre-opted
+  // boot image or apps, as these are compiled with dex2oatd.
+  return kIsDebugBuild &&
+      (kIsTargetBuild || IsCoreImage() || Runtime::Current()->UseJitCompilation());
+}
+
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
   ParseUintOption(option, "--huge-method-max", &huge_method_threshold_, Usage);
 }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index b99263d..1e05c4e 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -161,6 +161,9 @@
     return generate_mini_debug_info_;
   }
 
+  // Should run-time checks be emitted in debug mode?
+  bool EmitRunTimeChecksInDebugMode() const;
+
   bool GetGenerateBuildId() const {
     return generate_build_id_;
   }
@@ -177,10 +180,19 @@
     return implicit_suspend_checks_;
   }
 
+  // Are we compiling a boot image?
   bool IsBootImage() const {
     return boot_image_;
   }
 
+  // Are we compiling a core image (small boot image only used for ART testing)?
+  bool IsCoreImage() const {
+    // Ensure that `core_image_` => `boot_image_`.
+    DCHECK(!core_image_ || boot_image_);
+    return core_image_;
+  }
+
+  // Are we compiling an app image?
   bool IsAppImage() const {
     return app_image_;
   }
@@ -266,6 +278,7 @@
   const std::vector<const DexFile*>* no_inline_from_;
 
   bool boot_image_;
+  bool core_image_;
   bool app_image_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
diff --git a/compiler/image_test.h b/compiler/image_test.h
index 57d0987..daa4b11 100644
--- a/compiler/image_test.h
+++ b/compiler/image_test.h
@@ -214,7 +214,8 @@
                                                       /*compile_app_image*/false,
                                                       storage_mode,
                                                       oat_filename_vector,
-                                                      dex_file_to_oat_index_map));
+                                                      dex_file_to_oat_index_map,
+                                                      /*dirty_image_objects*/nullptr));
   {
     {
       jobject class_loader = nullptr;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 318009c..115e722 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -579,7 +579,12 @@
         }
       }
 
-      if (klass->GetStatus() == Class::kStatusInitialized) {
+      // Move known dirty objects into their own sections. This includes:
+      //   - classes with dirty static fields.
+      if (dirty_image_objects_ != nullptr &&
+          dirty_image_objects_->find(klass->PrettyDescriptor()) != dirty_image_objects_->end()) {
+        bin = kBinKnownDirty;
+      } else if (klass->GetStatus() == Class::kStatusInitialized) {
         bin = kBinClassInitialized;
 
         // If the class's static fields are all final, put it into a separate bin
@@ -770,18 +775,18 @@
       *result_ = true;
     }
 
-    // Record the object visited in case of circular reference.
-    visited_->emplace(ref);
     if (ref->IsClass()) {
       *result_ = *result_ ||
           image_writer_->PruneAppImageClassInternal(ref->AsClass(), early_exit_, visited_);
     } else {
+      // Record the object visited in case of circular reference.
+      visited_->emplace(ref);
       *result_ = *result_ ||
           image_writer_->PruneAppImageClassInternal(klass, early_exit_, visited_);
       ref->VisitReferences(*this, *this);
+      // Clean up before exit for next call of this function.
+      visited_->erase(ref);
     }
-    // Clean up before exit for next call of this function.
-    visited_->erase(ref);
   }
 
   ALWAYS_INLINE void operator() (ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
@@ -894,7 +899,7 @@
                                                 &my_early_exit,
                                                 visited);
   // Remove the class if the dex file is not in the set of dex files. This happens for classes that
-  // are from uses library if there is no profile. b/30688277
+  // are from uses-library if there is no profile. b/30688277
   mirror::DexCache* dex_cache = klass->GetDexCache();
   if (dex_cache != nullptr) {
     result = result ||
@@ -1153,9 +1158,22 @@
   Thread* self = Thread::Current();
   ScopedAssertNoThreadSuspension sa(__FUNCTION__);
 
-  // Clear class table strong roots so that dex caches can get pruned. We require pruning the class
-  // path dex caches.
-  class_linker->ClearClassTableStrongRoots();
+  // Prune uses-library dex caches. Only prune the uses-library dex caches since we want to make
+  // sure the other ones don't get unloaded before the OatWriter runs.
+  class_linker->VisitClassTables(
+      [&](ClassTable* table) REQUIRES_SHARED(Locks::mutator_lock_) {
+    table->RemoveStrongRoots(
+        [&](GcRoot<mirror::Object> root) REQUIRES_SHARED(Locks::mutator_lock_) {
+      ObjPtr<mirror::Object> obj = root.Read();
+      if (obj->IsDexCache()) {
+        // Return true if the dex file is not one of the ones in the map.
+        return dex_file_oat_index_map_.find(obj->AsDexCache()->GetDexFile()) ==
+            dex_file_oat_index_map_.end();
+      }
+      // Return false to avoid removing.
+      return false;
+    });
+  });
 
   // Remove the undesired classes from the class roots.
   ObjPtr<mirror::ClassLoader> class_loader;
@@ -1668,6 +1686,10 @@
       runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs);
   image_methods_[ImageHeader::kSaveEverythingMethod] =
       runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything);
+  image_methods_[ImageHeader::kSaveEverythingMethodForClinit] =
+      runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForClinit);
+  image_methods_[ImageHeader::kSaveEverythingMethodForSuspendCheck] =
+      runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForSuspendCheck);
   // Visit image methods first to have the main runtime methods in the first image.
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
@@ -2761,7 +2783,8 @@
     bool compile_app_image,
     ImageHeader::StorageMode image_storage_mode,
     const std::vector<const char*>& oat_filenames,
-    const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map)
+    const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map,
+    const std::unordered_set<std::string>* dirty_image_objects)
     : compiler_driver_(compiler_driver),
       global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
       image_objects_offset_begin_(0),
@@ -2773,7 +2796,8 @@
       clean_methods_(0u),
       image_storage_mode_(image_storage_mode),
       oat_filenames_(oat_filenames),
-      dex_file_oat_index_map_(dex_file_oat_index_map) {
+      dex_file_oat_index_map_(dex_file_oat_index_map),
+      dirty_image_objects_(dirty_image_objects) {
   CHECK_NE(image_begin, 0U);
   std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
   CHECK_EQ(compile_app_image, !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty())
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 34bbbad..866e204 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -75,7 +75,8 @@
               bool compile_app_image,
               ImageHeader::StorageMode image_storage_mode,
               const std::vector<const char*>& oat_filenames,
-              const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map);
+              const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map,
+              const std::unordered_set<std::string>* dirty_image_objects);
 
   bool PrepareImageAddressSpace();
 
@@ -159,6 +160,7 @@
   // Classify different kinds of bins that objects end up getting packed into during image writing.
   // Ordered from dirtiest to cleanest (until ArtMethods).
   enum Bin {
+    kBinKnownDirty,               // Known dirty objects from --dirty-image-objects list
     kBinMiscDirty,                // Dex caches, object locks, etc...
     kBinClassVerified,            // Class verified, but initializers haven't been run
     // Unknown mix of clean/dirty:
@@ -599,6 +601,9 @@
   // Map of dex files to the indexes of oat files that they were compiled into.
   const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map_;
 
+  // Set of objects known to be dirty in the image. Can be nullptr if there are none.
+  const std::unordered_set<std::string>* dirty_image_objects_;
+
   class ComputeLazyFieldsForClassesVisitor;
   class FixupClassVisitor;
   class FixupRootVisitor;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index b65b93f..e7e4647 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -219,7 +219,9 @@
   // Assembler that holds generated instructions
   std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
       GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
-  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
+  const CompilerOptions& compiler_options = driver->GetCompilerOptions();
+  jni_asm->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo());
+  jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode());
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4d258af..0b2d7f4 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -62,6 +62,9 @@
 
 namespace {  // anonymous namespace
 
+// If we write dex layout info in the oat file.
+static constexpr bool kWriteDexLayoutInfo = true;
+
 typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
 
 const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
@@ -288,10 +291,14 @@
   uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
   uint32_t method_bss_mapping_offset_;
+  uint32_t dex_sections_layout_offset_;
 
   // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
+  // Dex section layout info to serialize.
+  DexLayoutSections dex_sections_layout_;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
@@ -362,6 +369,9 @@
     size_oat_dex_file_offset_(0),
     size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
+    size_oat_dex_file_dex_layout_sections_offset_(0),
+    size_oat_dex_file_dex_layout_sections_(0),
+    size_oat_dex_file_dex_layout_sections_alignment_(0),
     size_oat_dex_file_method_bss_mapping_offset_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
@@ -571,11 +581,16 @@
     }
   }
 
-  // Write TypeLookupTables into OAT.
+  // Write type lookup tables into the oat file.
   if (!WriteTypeLookupTables(&checksum_updating_rodata, dex_files)) {
     return false;
   }
 
+  // Write dex layout sections into the oat file.
+  if (!WriteDexLayoutSections(&checksum_updating_rodata, dex_files)) {
+    return false;
+  }
+
   *opened_dex_files_map = std::move(dex_files_map);
   *opened_dex_files = std::move(dex_files);
   write_state_ = WriteState::kPrepareLayout;
@@ -1282,9 +1297,12 @@
   bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
-    if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
-      dex_cache_ = class_linker_->FindDexCache(Thread::Current(), *dex_file);
-      DCHECK(dex_cache_ != nullptr);
+    if (writer_->GetCompilerDriver()->GetCompilerOptions().IsAotCompilationEnabled()) {
+      // Only need to set the dex cache if we have compilation. Other modes might have unloaded it.
+      if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
+        dex_cache_ = class_linker_->FindDexCache(Thread::Current(), *dex_file);
+        DCHECK(dex_cache_ != nullptr);
+      }
     }
     return true;
   }
@@ -2317,6 +2335,9 @@
     DO_STAT(size_oat_dex_file_offset_);
     DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
+    DO_STAT(size_oat_dex_file_dex_layout_sections_offset_);
+    DO_STAT(size_oat_dex_file_dex_layout_sections_);
+    DO_STAT(size_oat_dex_file_dex_layout_sections_alignment_);
     DO_STAT(size_oat_dex_file_method_bss_mapping_offset_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
@@ -2805,6 +2826,7 @@
   if (!WriteDexFile(out, oat_dex_file, mem_map->Begin(), /* update_input_vdex */ false)) {
     return false;
   }
+  oat_dex_file->dex_sections_layout_ = dex_layout.GetSections();
   // Set the checksum of the new oat dex file to be the original file's checksum.
   oat_dex_file->dex_file_location_checksum_ = dex_file->GetLocationChecksum();
   return true;
@@ -3150,6 +3172,70 @@
   return true;
 }
 
+bool OatWriter::WriteDexLayoutSections(
+    OutputStream* oat_rodata,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split(__FUNCTION__, timings_);
+
+  if (!kWriteDexLayoutInfo) {
+    return true;;
+  }
+
+  uint32_t expected_offset = oat_data_offset_ + oat_size_;
+  off_t actual_offset = oat_rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to dex layout section offset section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << oat_rodata->GetLocation();
+    return false;
+  }
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  size_t rodata_offset = oat_size_;
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    DCHECK_EQ(oat_dex_file->dex_sections_layout_offset_, 0u);
+
+    // Write dex layout section alignment bytes.
+    const size_t padding_size =
+        RoundUp(rodata_offset, alignof(DexLayoutSections)) - rodata_offset;
+    if (padding_size != 0u) {
+      std::vector<uint8_t> buffer(padding_size, 0u);
+      if (!oat_rodata->WriteFully(buffer.data(), padding_size)) {
+        PLOG(ERROR) << "Failed to write lookup table alignment padding."
+                    << " File: " << oat_dex_file->GetLocation()
+                    << " Output: " << oat_rodata->GetLocation();
+        return false;
+      }
+      size_oat_dex_file_dex_layout_sections_alignment_ += padding_size;
+      rodata_offset += padding_size;
+    }
+
+    DCHECK_ALIGNED(rodata_offset, alignof(DexLayoutSections));
+    DCHECK_EQ(oat_data_offset_ + rodata_offset,
+              static_cast<size_t>(oat_rodata->Seek(0u, kSeekCurrent)));
+    DCHECK(oat_dex_file != nullptr);
+    if (!oat_rodata->WriteFully(&oat_dex_file->dex_sections_layout_,
+                                sizeof(oat_dex_file->dex_sections_layout_))) {
+      PLOG(ERROR) << "Failed to write dex layout sections."
+                  << " File: " << oat_dex_file->GetLocation()
+                  << " Output: " << oat_rodata->GetLocation();
+      return false;
+    }
+    oat_dex_file->dex_sections_layout_offset_ = rodata_offset;
+    size_oat_dex_file_dex_layout_sections_ += sizeof(oat_dex_file->dex_sections_layout_);
+    rodata_offset += sizeof(oat_dex_file->dex_sections_layout_);
+  }
+  oat_size_ = rodata_offset;
+
+  if (!oat_rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing type dex layout sections."
+                << " File: " << oat_rodata->GetLocation();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) {
   if (!kIsVdexEnabled) {
     return true;
@@ -3249,6 +3335,7 @@
       class_offsets_offset_(0u),
       lookup_table_offset_(0u),
       method_bss_mapping_offset_(0u),
+      dex_sections_layout_offset_(0u),
       class_offsets_() {
 }
 
@@ -3259,7 +3346,8 @@
           + sizeof(dex_file_offset_)
           + sizeof(class_offsets_offset_)
           + sizeof(lookup_table_offset_)
-          + sizeof(method_bss_mapping_offset_);
+          + sizeof(method_bss_mapping_offset_)
+          + sizeof(dex_sections_layout_offset_);
 }
 
 bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
@@ -3302,6 +3390,12 @@
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
 
+  if (!out->WriteFully(&dex_sections_layout_offset_, sizeof(dex_sections_layout_offset_))) {
+    PLOG(ERROR) << "Failed to write dex section layout info to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_dex_layout_sections_offset_ += sizeof(dex_sections_layout_offset_);
+
   if (!out->WriteFully(&method_bss_mapping_offset_, sizeof(method_bss_mapping_offset_))) {
     PLOG(ERROR) << "Failed to write method bss mapping offset to " << out->GetLocation();
     return false;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 470d69e..8db00f7 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -324,6 +324,8 @@
   bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
   bool WriteTypeLookupTables(OutputStream* oat_rodata,
                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
+  bool WriteDexLayoutSections(OutputStream* oat_rodata,
+                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
   bool WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat);
   void SetMultiOatRelativePatcherAdjustment();
@@ -455,6 +457,9 @@
   uint32_t size_oat_dex_file_offset_;
   uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
+  uint32_t size_oat_dex_file_dex_layout_sections_offset_;
+  uint32_t size_oat_dex_file_dex_layout_sections_;
+  uint32_t size_oat_dex_file_dex_layout_sections_alignment_;
   uint32_t size_oat_dex_file_method_bss_mapping_offset_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index c166deb..2f96cfa 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1121,6 +1121,66 @@
     }
   }
 
+  void VisitRem(HRem* instruction) OVERRIDE {
+    HInstruction* left = instruction->GetLeft();
+    HInstruction* right = instruction->GetRight();
+
+    // Handle 'i % CONST' format expression in array index, e.g:
+    //   array[i % 20];
+    if (right->IsIntConstant()) {
+      int32_t right_const = std::abs(right->AsIntConstant()->GetValue());
+      if (right_const == 0) {
+        return;
+      }
+      // The sign of divisor CONST doesn't affect the sign final value range.
+      // For example:
+      // if (i > 0) {
+      //   array[i % 10];  // index value range [0, 9]
+      //   array[i % -10]; // index value range [0, 9]
+      // }
+      ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange(
+          GetGraph()->GetArena(),
+          ValueBound(nullptr, 1 - right_const),
+          ValueBound(nullptr, right_const - 1));
+
+      ValueRange* left_range = LookupValueRange(left, left->GetBlock());
+      if (left_range != nullptr) {
+        right_range = left_range->Narrow(right_range);
+      }
+      AssignRange(instruction->GetBlock(), instruction, right_range);
+      return;
+    }
+
+    // Handle following pattern:
+    // i0 NullCheck
+    // i1 ArrayLength[i0]
+    // i2 DivByZeroCheck [i1]  <-- right
+    // i3 Rem [i5, i2]         <-- we are here.
+    // i4 BoundsCheck [i3,i1]
+    if (right->IsDivZeroCheck()) {
+      // if array_length can pass div-by-zero check,
+      // array_length must be > 0.
+      right = right->AsDivZeroCheck()->InputAt(0);
+    }
+
+    // Handle 'i % array.length' format expression in array index, e.g:
+    //   array[(i+7) % array.length];
+    if (right->IsArrayLength()) {
+      ValueBound lower = ValueBound::Min();  // ideally, lower should be '1-array_length'.
+      ValueBound upper = ValueBound(right, -1);  // array_length - 1
+      ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange(
+          GetGraph()->GetArena(),
+          lower,
+          upper);
+      ValueRange* left_range = LookupValueRange(left, left->GetBlock());
+      if (left_range != nullptr) {
+        right_range = left_range->Narrow(right_range);
+      }
+      AssignRange(instruction->GetBlock(), instruction, right_range);
+      return;
+    }
+  }
+
   void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->GetLength();
     if (!len->IsIntConstant()) {
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 575e2fc..2aaf058 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -951,4 +951,152 @@
   ASSERT_TRUE(IsRemoved(bounds_check6));
 }
 
+// int[] array = new int[10];
+// for (int i=0; i<200; i++) {
+//   array[i%10] = 10;            // Can eliminate
+//   array[i%1] = 10;             // Can eliminate
+//   array[i%200] = 10;           // Cannot eliminate
+//   array[i%-10] = 10;           // Can eliminate
+//   array[i%array.length] = 10;  // Can eliminate
+//   array[param_i%10] = 10;      // Can't eliminate, when param_i < 0
+// }
+TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* param_i = new (&allocator_)
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
+  entry->AddInstruction(param_i);
+
+  HInstruction* constant_0 = graph_->GetIntConstant(0);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
+  HInstruction* constant_10 = graph_->GetIntConstant(10);
+  HInstruction* constant_200 = graph_->GetIntConstant(200);
+  HInstruction* constant_minus_10 = graph_->GetIntConstant(-10);
+
+  HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block);
+  entry->AddSuccessor(block);
+  // We pass a bogus constant for the class to avoid mocking one.
+  HInstruction* new_array = new (&allocator_) HNewArray(constant_10, constant_10, 0);
+  block->AddInstruction(new_array);
+  block->AddInstruction(new (&allocator_) HGoto());
+
+  HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_);
+  HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+
+  graph_->AddBlock(loop_header);
+  graph_->AddBlock(loop_body);
+  graph_->AddBlock(exit);
+  block->AddSuccessor(loop_header);
+  loop_header->AddSuccessor(exit);       // true successor
+  loop_header->AddSuccessor(loop_body);  // false successor
+  loop_body->AddSuccessor(loop_header);
+
+  HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+  HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi, constant_200);
+  HInstruction* if_inst = new (&allocator_) HIf(cmp);
+  loop_header->AddPhi(phi);
+  loop_header->AddInstruction(cmp);
+  loop_header->AddInstruction(if_inst);
+  phi->AddInput(constant_0);
+
+  //////////////////////////////////////////////////////////////////////////////////
+  // LOOP BODY:
+  // array[i % 10] = 10;
+  HRem* i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_10, 0);
+  HBoundsCheck* bounds_check_i_mod_10 = new (&allocator_) HBoundsCheck(i_mod_10, constant_10, 0);
+  HInstruction* array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_10, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_10);
+  loop_body->AddInstruction(bounds_check_i_mod_10);
+  loop_body->AddInstruction(array_set);
+
+  // array[i % 1] = 10;
+  HRem* i_mod_1 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0);
+  HBoundsCheck* bounds_check_i_mod_1 = new (&allocator_) HBoundsCheck(i_mod_1, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_1, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_1);
+  loop_body->AddInstruction(bounds_check_i_mod_1);
+  loop_body->AddInstruction(array_set);
+
+  // array[i % 200] = 10;
+  HRem* i_mod_200 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0);
+  HBoundsCheck* bounds_check_i_mod_200 = new (&allocator_) HBoundsCheck(i_mod_200, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_200, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_200);
+  loop_body->AddInstruction(bounds_check_i_mod_200);
+  loop_body->AddInstruction(array_set);
+
+  // array[i % -10] = 10;
+  HRem* i_mod_minus_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_minus_10, 0);
+  HBoundsCheck* bounds_check_i_mod_minus_10 = new (&allocator_) HBoundsCheck(
+      i_mod_minus_10, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_minus_10, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_minus_10);
+  loop_body->AddInstruction(bounds_check_i_mod_minus_10);
+  loop_body->AddInstruction(array_set);
+
+  // array[i%array.length] = 10;
+  HNullCheck* null_check = new (&allocator_) HNullCheck(new_array, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
+  HRem* i_mod_array_length = new (&allocator_) HRem(Primitive::kPrimInt, phi, array_length, 0);
+  HBoundsCheck* bounds_check_i_mod_array_len = new (&allocator_) HBoundsCheck(
+      i_mod_array_length, array_length, 0);
+  array_set = new (&allocator_) HArraySet(
+      null_check, bounds_check_i_mod_array_len, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(null_check);
+  loop_body->AddInstruction(array_length);
+  loop_body->AddInstruction(i_mod_array_length);
+  loop_body->AddInstruction(bounds_check_i_mod_array_len);
+  loop_body->AddInstruction(array_set);
+
+  // array[param_i % 10] = 10;
+  HRem* param_i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, param_i, constant_10, 0);
+  HBoundsCheck* bounds_check_param_i_mod_10 = new (&allocator_) HBoundsCheck(
+      param_i_mod_10, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_param_i_mod_10, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(param_i_mod_10);
+  loop_body->AddInstruction(bounds_check_param_i_mod_10);
+  loop_body->AddInstruction(array_set);
+
+  // array[param_i%array.length] = 10;
+  null_check = new (&allocator_) HNullCheck(new_array, 0);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
+  HRem* param_i_mod_array_length = new (&allocator_) HRem(
+      Primitive::kPrimInt, param_i, array_length, 0);
+  HBoundsCheck* bounds_check_param_i_mod_array_len = new (&allocator_) HBoundsCheck(
+      param_i_mod_array_length, array_length, 0);
+  array_set = new (&allocator_) HArraySet(
+      null_check, bounds_check_param_i_mod_array_len, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(null_check);
+  loop_body->AddInstruction(array_length);
+  loop_body->AddInstruction(param_i_mod_array_length);
+  loop_body->AddInstruction(bounds_check_param_i_mod_array_len);
+  loop_body->AddInstruction(array_set);
+
+  // i++;
+  HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, constant_1);
+  loop_body->AddInstruction(add);
+  loop_body->AddInstruction(new (&allocator_) HGoto());
+  phi->AddInput(add);
+  //////////////////////////////////////////////////////////////////////////////////
+
+  exit->AddInstruction(new (&allocator_) HExit());
+
+  RunBCE();
+
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_10));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_1));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_200));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_minus_10));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_array_len));
+  ASSERT_FALSE(IsRemoved(bounds_check_param_i_mod_10));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2927e1f..0d9d3d4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -31,12 +31,6 @@
 
 namespace art {
 
-void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
-  if (compilation_stats_ != nullptr) {
-    compilation_stats_->RecordStat(compilation_stat);
-  }
-}
-
 bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
   if (compiler_driver_ == nullptr) {
     // Note that the compiler driver is null when unit testing.
@@ -53,7 +47,8 @@
     VLOG(compiler) << "Skip compilation of huge method "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
@@ -63,7 +58,8 @@
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 43429cf..2c9a9ef 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -109,7 +109,6 @@
   static constexpr const char* kBuilderPassName = "builder";
 
  private:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
   bool SkipCompilation(size_t number_of_branches);
 
   HGraph* const graph_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d7d0fff..1e5f1ec 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -610,12 +610,6 @@
   }
 }
 
-void CodeGenerator::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
-  if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat, count);
-  }
-}
-
 std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
                                                      InstructionSet instruction_set,
                                                      const InstructionSetFeatures& isa_features,
@@ -1212,10 +1206,10 @@
 
 void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
   if (compiler_options_.GetImplicitNullChecks()) {
-    MaybeRecordStat(kImplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, kImplicitNullCheckGenerated);
     GenerateImplicitNullCheck(instruction);
   } else {
-    MaybeRecordStat(kExplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, kExplicitNullCheckGenerated);
     GenerateExplicitNullCheck(instruction);
   }
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 51a0bae..30c2b52 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -254,8 +254,6 @@
 
   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
-
   // Saves the register in the stack. Returns the size taken on stack.
   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
   // Restores the register from the stack. Returns the size taken on stack.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4999950..3be774a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1595,6 +1595,8 @@
       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
     }
   }
+
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
@@ -3587,6 +3589,7 @@
   }
   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
@@ -4391,6 +4394,7 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
@@ -4459,6 +4463,8 @@
     DCHECK(!codegen_->IsLeafMethod());
     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4626,6 +4632,7 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   codegen_->GenerateInvokePolymorphicCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
@@ -4801,27 +4808,37 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
     return;
   }
 
-  // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
-  // are no pools emitted.
-  EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
-  LocationSummary* locations = invoke->GetLocations();
-  codegen_->GenerateStaticOrDirectCall(
-      invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+  {
+    // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
+    // are no pools emitted.
+    EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+    LocationSummary* locations = invoke->GetLocations();
+    codegen_->GenerateStaticOrDirectCall(
+        invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+  }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
     return;
   }
 
-  // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
-  // are no pools emitted.
-  EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
-  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-  DCHECK(!codegen_->IsLeafMethod());
+  {
+    // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
+    // are no pools emitted.
+    EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+    codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+    DCHECK(!codegen_->IsLeafMethod());
+  }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
@@ -4895,6 +4912,7 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
@@ -4995,6 +5013,7 @@
     } else {
       __ Bind(slow_path->GetExitLabel());
     }
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
   }
 }
 
@@ -5113,6 +5132,7 @@
       codegen_->AddSlowPath(slow_path);
       __ Cbz(out.X(), slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
+      codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
       return;
     }
     case HLoadString::LoadKind::kJitTableAddress: {
@@ -5137,6 +5157,7 @@
   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -5164,6 +5185,7 @@
   } else {
     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -5260,6 +5282,7 @@
       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -5296,6 +5319,7 @@
     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -5644,6 +5668,7 @@
     return;
   }
   GenerateSuspendCheck(instruction, nullptr);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
@@ -6021,6 +6046,7 @@
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6074,22 +6100,25 @@
         obj.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
-    EmissionCheckScope guard(GetVIXLAssembler(),
-                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
-    vixl::aarch64::Label return_address;
-    __ adr(lr, &return_address);
-    __ Bind(cbnz_label);
-    __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
-    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
-                  "Field LDR must be 1 instruction (4B) before the return address label; "
-                  " 2 instructions (8B) for heap poisoning.");
-    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
-    __ ldr(ref_reg, MemOperand(base.X(), offset));
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
+    {
+      EmissionCheckScope guard(GetVIXLAssembler(),
+                               (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+      vixl::aarch64::Label return_address;
+      __ adr(lr, &return_address);
+      __ Bind(cbnz_label);
+      __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+      static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                    "Field LDR must be 1 instruction (4B) before the return address label; "
+                    " 2 instructions (8B) for heap poisoning.");
+      Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+      __ ldr(ref_reg, MemOperand(base.X(), offset));
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
+      GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+      __ Bind(&return_address);
     }
-    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-    __ Bind(&return_address);
+    MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
     return;
   }
 
@@ -6158,19 +6187,22 @@
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(temp.X(), obj.X(), Operand(data_offset));
-    EmissionCheckScope guard(GetVIXLAssembler(),
-                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
-    vixl::aarch64::Label return_address;
-    __ adr(lr, &return_address);
-    __ Bind(cbnz_label);
-    __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
-    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
-                  "Array LDR must be 1 instruction (4B) before the return address label; "
-                  " 2 instructions (8B) for heap poisoning.");
-    __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
-    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
-    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-    __ Bind(&return_address);
+    {
+      EmissionCheckScope guard(GetVIXLAssembler(),
+                               (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+      vixl::aarch64::Label return_address;
+      __ adr(lr, &return_address);
+      __ Bind(cbnz_label);
+      __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+      static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                    "Array LDR must be 1 instruction (4B) before the return address label; "
+                    " 2 instructions (8B) for heap poisoning.");
+      __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+      DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+      GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+      __ Bind(&return_address);
+    }
+    MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
     return;
   }
 
@@ -6247,6 +6279,7 @@
   GenerateRawReferenceLoad(
       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -6303,6 +6336,7 @@
   // Fast path: the GC is not marking: nothing to do (the field is
   // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -6381,6 +6415,19 @@
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 }
 
+void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+  // The following condition is a compile-time one, so it does not have a run-time cost.
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+    // The following condition is a run-time one; it is executed after the
+    // previous compile-time test, to avoid penalizing non-debug builds.
+    if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
+      GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
+    }
+  }
+}
+
 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
                                                  Location out,
                                                  Location ref,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 584eead..c339209 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -687,6 +687,22 @@
                                 bool needs_null_check,
                                 bool use_load_acquire);
 
+  // Emit code checking the status of the Marking Register, and
+  // aborting the program if MR does not match the value stored in the
+  // art::Thread object. Code is only emitted in debug mode and if
+  // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+  //
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+  // passed to the BRK instruction.
+  //
+  // If `temp_loc` is a valid location, it is expected to be a
+  // register and will be used as a temporary to generate code;
+  // otherwise, a temporary will be fetched from the core register
+  // scratch pool.
+  virtual void MaybeGenerateMarkingRegisterCheck(int code,
+                                                 Location temp_loc = Location::NoLocation());
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 430cdde..3d45dd3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -94,6 +94,9 @@
 // The reserved entrypoint register for link-time generated thunks.
 const vixl32::Register kBakerCcEntrypointRegister = r4;
 
+// Using a base helps identify when we hit Marking Register check breakpoints.
+constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
+
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
 #endif
@@ -2690,6 +2693,8 @@
     __ Mov(temp, 0);
     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
   }
+
+  MaybeGenerateMarkingRegisterCheck(/* code */ 1);
 }
 
 void CodeGeneratorARMVIXL::GenerateFrameExit() {
@@ -2938,6 +2943,7 @@
   }
   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
@@ -3655,6 +3661,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -3685,12 +3692,15 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4);
     return;
   }
 
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5);
 }
 
 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
@@ -3709,11 +3719,14 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6);
     return;
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -3790,6 +3803,8 @@
     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
     DCHECK(!codegen_->IsLeafMethod());
   }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
@@ -3798,6 +3813,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   codegen_->GenerateInvokePolymorphicCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9);
 }
 
 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
@@ -5329,6 +5345,7 @@
     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10);
 }
 
 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
@@ -5348,6 +5365,7 @@
   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   DCHECK(!codegen_->IsLeafMethod());
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11);
 }
 
 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
@@ -6965,6 +6983,7 @@
     return;
   }
   GenerateSuspendCheck(instruction, nullptr);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12);
 }
 
 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
@@ -7326,6 +7345,7 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
@@ -7405,6 +7425,7 @@
     } else {
       __ Bind(slow_path->GetExitLabel());
     }
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14);
   }
 }
 
@@ -7528,6 +7549,7 @@
       codegen_->AddSlowPath(slow_path);
       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
+      codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15);
       return;
     }
     case HLoadString::LoadKind::kJitTableAddress: {
@@ -7548,6 +7570,7 @@
   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16);
 }
 
 static int32_t GetExceptionTlsOffset() {
@@ -8146,6 +8169,7 @@
   } else {
     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17);
 }
 
 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
@@ -8647,6 +8671,7 @@
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
 }
 
 void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8711,31 +8736,34 @@
         base.GetCode(), obj.GetCode(), narrow);
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
-    vixl32::Label return_address;
-    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
-    __ cmp(mr, Operand(0));
-    EmitPlaceholderBne(this, bne_label);
-    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
-    __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
-    }
-    // Note: We need a specific width for the unpoisoning NEG.
-    if (kPoisonHeapReferences) {
-      if (narrow) {
-        // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
-        __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
-      } else {
-        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    {
+      vixl::EmissionCheckScope guard(
+          GetVIXLAssembler(),
+          (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+      vixl32::Label return_address;
+      EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+      __ cmp(mr, Operand(0));
+      EmitPlaceholderBne(this, bne_label);
+      ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+      __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
       }
+      // Note: We need a specific width for the unpoisoning NEG.
+      if (kPoisonHeapReferences) {
+        if (narrow) {
+          // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+          __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+        } else {
+          __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+        }
+      }
+      __ Bind(&return_address);
+      DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+                       : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
     }
-    __ Bind(&return_address);
-    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
-              narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
-                     : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+    MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip));
     return;
   }
 
@@ -8796,23 +8824,26 @@
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(data_reg, obj, Operand(data_offset));
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
-    vixl32::Label return_address;
-    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
-    __ cmp(mr, Operand(0));
-    EmitPlaceholderBne(this, bne_label);
-    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
-    __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
-    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
-    // Note: We need a Wide NEG for the unpoisoning.
-    if (kPoisonHeapReferences) {
-      __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    {
+      vixl::EmissionCheckScope guard(
+          GetVIXLAssembler(),
+          (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+      vixl32::Label return_address;
+      EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+      __ cmp(mr, Operand(0));
+      EmitPlaceholderBne(this, bne_label);
+      ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+      __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+      DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+      // Note: We need a Wide NEG for the unpoisoning.
+      if (kPoisonHeapReferences) {
+        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+      }
+      __ Bind(&return_address);
+      DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
     }
-    __ Bind(&return_address);
-    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
-              BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+    MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
     return;
   }
 
@@ -8866,6 +8897,7 @@
   // Fast path: the GC is not marking: just load the reference.
   GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ 21);
 }
 
 void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -8920,6 +8952,7 @@
   // Fast path: the GC is not marking: nothing to do (the field is
   // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ 22);
 }
 
 void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -8981,6 +9014,20 @@
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 }
 
+void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+  // The following condition is a compile-time one, so it does not have a run-time cost.
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+    // The following condition is a run-time one; it is executed after the
+    // previous compile-time test, to avoid penalizing non-debug builds.
+    if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
+      GetAssembler()->GenerateMarkingRegisterCheck(temp,
+                                                   kMarkingRegisterCheckBreakCodeBaseCode + code);
+    }
+  }
+}
+
 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
                                                    Location out,
                                                    Location ref,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 7ab2993..5feb33b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -661,6 +661,28 @@
                                 ScaleFactor scale_factor,
                                 bool needs_null_check);
 
+  // Emit code checking the status of the Marking Register, and
+  // aborting the program if MR does not match the value stored in the
+  // art::Thread object. Code is only emitted in debug mode and if
+  // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+  //
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+  // used together with kMarkingRegisterCheckBreakCodeBaseCode to
+  // create the value passed to the BKPT instruction. Note that unlike
+  // in the ARM64 code generator, where `__LINE__` is passed as `code`
+  // argument to
+  // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck, we cannot
+  // realistically do that here, as Encoding T1 for the BKPT
+  // instruction only accepts 8-bit immediate values.
+  //
+  // If `temp_loc` is a valid location, it is expected to be a
+  // register and will be used as a temporary to generate code;
+  // otherwise, a temporary will be fetched from the core register
+  // scratch pool.
+  virtual void MaybeGenerateMarkingRegisterCheck(int code,
+                                                 Location temp_loc = Location::NoLocation());
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index f422b9f..9095ecd 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -15,7 +15,9 @@
  */
 
 #include "code_generator_arm64.h"
+
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ea36e90..6bf28ab 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -819,11 +819,74 @@
 }
 
 void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+      DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = instr->GetLocations();
+  VectorRegister acc =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+  VectorRegister left =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+  VectorRegister right =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvB(acc, left, right);
+      } else {
+        __ MsubvB(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvH(acc, left, right);
+      } else {
+        __ MsubvH(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvW(acc, left, right);
+      } else {
+        __ MsubvW(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvD(acc, left, right);
+      } else {
+        __ MsubvD(acc, left, right);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 0395db1..75bf7a7 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -823,11 +823,74 @@
 }
 
 void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+      DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = instr->GetLocations();
+  VectorRegister acc =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+  VectorRegister left =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+  VectorRegister right =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvB(acc, left, right);
+      } else {
+        __ MsubvB(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvH(acc, left, right);
+      } else {
+        __ MsubvH(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvW(acc, left, right);
+      } else {
+        __ MsubvW(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvD(acc, left, right);
+      } else {
+        __ MsubvD(acc, left, right);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 14782d7..e7aec76 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -15,7 +15,9 @@
  */
 
 #include "code_generator_x86.h"
+
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace x86 {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 246044e..c7ee81c 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -15,7 +15,9 @@
  */
 
 #include "code_generator_x86_64.h"
+
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace x86_64 {
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index e598e19..6c3a9fd 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -414,7 +414,7 @@
     if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
       continue;
     }
-    MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk);
     instruction->MoveBefore(position, /* ensure_safety */ false);
   }
 }
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cada2e6..aa4f5da 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -79,6 +79,21 @@
 };
 
 #ifdef ART_ENABLE_CODEGEN_arm
+// Special ARM code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (R9) and possibly the Marking Register
+//   (R8) before entering the generated function (both registers are
+//   callee-save in AAPCS);
+// - set these registers to meaningful values before or upon entering
+//   the generated function (so that generated code using them is
+//   correct);
+// - restore their original values before leaving the generated
+//   function.
+
 // Provide our own codegen, that ensures the C calling conventions
 // are preserved. Currently, ART and C do not match as R4 is caller-save
 // in ART, and callee-save in C. Alternatively, we could use or write
@@ -100,6 +115,50 @@
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
   }
+
+  void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
+                                         Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+    // When turned on, the marking register checks in
+    // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
+    // Thread Register and the Marking Register to be set to
+    // meaningful values. This is not the case in codegen testing, so
+    // just disable them entirely here (by doing nothing in this
+    // method).
+  }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+// Special ARM64 code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (X19) and possibly the Marking Register
+//   (X20) before entering the generated function (both registers are
+//   callee-save in AAPCS64);
+// - set these registers to meaningful values before or upon entering
+//   the generated function (so that generated code using them is
+//   correct);
+// - restore their original values before leaving the generated
+//   function.
+class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
+ public:
+  TestCodeGeneratorARM64(HGraph* graph,
+                         const Arm64InstructionSetFeatures& isa_features,
+                         const CompilerOptions& compiler_options)
+      : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {}
+
+  void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
+                                         Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+    // When turned on, the marking register checks in
+    // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
+    // Thread Register and the Marking Register to be set to
+    // meaningful values. This is not the case in codegen testing, so
+    // just disable them entirely here (by doing nothing in this
+    // method).
+  }
 };
 #endif
 
@@ -263,7 +322,8 @@
                     bool has_result,
                     Expected expected) {
   CompilerOptions compiler_options;
-  std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
+  std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph,
+                                                                           compiler_options));
   RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
 }
 
@@ -280,9 +340,8 @@
 CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
   std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
       Arm64InstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
-                                                           *features_arm64.get(),
-                                                           compiler_options);
+  return new (graph->GetArena())
+      TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options);
 }
 #endif
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index c31c66a..787296d 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -359,7 +359,7 @@
       DCHECK(!inst->IsControlFlow());
       if (inst->IsDeadAndRemovable()) {
         block->RemoveInstruction(inst);
-        MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
+        MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction);
       }
     }
   }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index ab1772b..0b980f5 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -151,6 +151,16 @@
   }
 
   /**
+   * Checks if the given phi instruction has been classified as anything by
+   * induction variable analysis. Returns false for anything that cannot be
+   * classified statically, such as reductions or other complex cycles.
+   */
+  bool IsClassified(HPhi* phi) const {
+    HLoopInformation* lp = phi->GetBlock()->GetLoopInformation();  // closest enveloping loop
+    return (lp != nullptr) && (induction_analysis_->LookupInfo(lp, phi) != nullptr);
+  }
+
+  /**
    * Checks if header logic of a loop terminates. Sets trip-count tc if known.
    */
   bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0141c26..6567a3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -75,7 +75,7 @@
 #define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
 #define LOG_NOTE() LOG_INTERNAL("Note: ")
 #define LOG_SUCCESS() LOG_INTERNAL("Success: ")
-#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL(stats_ptr, stat) MaybeRecordStat(stats_ptr, stat); LOG_INTERNAL("Fail: ")
 #define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
 
 std::string HInliner::DepthString(int line) const {
@@ -440,9 +440,9 @@
         // Add dependency due to devirtulization. We've assumed resolved_method
         // has single implementation.
         outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
-        MaybeRecordStat(kCHAInline);
+        MaybeRecordStat(stats_, kCHAInline);
       } else {
-        MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+        MaybeRecordStat(stats_, kInlinedInvokeVirtualOrInterface);
       }
     }
     return result;
@@ -532,7 +532,7 @@
     }
 
     case kInlineCacheMonomorphic: {
-      MaybeRecordStat(kMonomorphicCall);
+      MaybeRecordStat(stats_, kMonomorphicCall);
       if (UseOnlyPolymorphicInliningWithNoDeopt()) {
         return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
@@ -541,7 +541,7 @@
     }
 
     case kInlineCachePolymorphic: {
-      MaybeRecordStat(kPolymorphicCall);
+      MaybeRecordStat(stats_, kPolymorphicCall);
       return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
     }
 
@@ -550,7 +550,7 @@
           << "Interface or virtual call to "
           << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
           << " is megamorphic and not inlined";
-      MaybeRecordStat(kMegamorphicCall);
+      MaybeRecordStat(stats_, kMegamorphicCall);
       return false;
     }
 
@@ -754,7 +754,7 @@
   dex::TypeIndex class_index = FindClassIndexIn(
       GetMonomorphicType(classes), caller_compilation_unit_);
   if (!class_index.IsValid()) {
-    LOG_FAIL(kNotInlinedDexCache)
+    LOG_FAIL(stats_, kNotInlinedDexCache)
         << "Call to " << ArtMethod::PrettyMethod(resolved_method)
         << " from inline cache is not inlined because its class is not"
         << " accessible to the caller";
@@ -803,7 +803,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(kInlinedMonomorphicCall);
+  MaybeRecordStat(stats_, kInlinedMonomorphicCall);
   return true;
 }
 
@@ -993,7 +993,7 @@
     return false;
   }
 
-  MaybeRecordStat(kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
 
   // Run type propagation to get the guards typed.
   ReferenceTypePropagation rtp_fixup(graph_,
@@ -1199,7 +1199,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
 
   LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
   return true;
@@ -1300,14 +1300,14 @@
                                  ReferenceTypeInfo receiver_type,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    LOG_FAIL(kNotInlinedProxy)
+    LOG_FAIL(stats_, kNotInlinedProxy)
         << "Method " << method->PrettyMethod()
         << " is not inlined because of unimplemented inline support for proxy methods.";
     return false;
   }
 
   if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
-    LOG_FAIL(kNotInlinedRecursiveBudget)
+    LOG_FAIL(stats_, kNotInlinedRecursiveBudget)
         << "Method "
         << method->PrettyMethod()
         << " is not inlined because it has reached its recursive call budget.";
@@ -1321,10 +1321,10 @@
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
       LOG_SUCCESS() << "Successfully replaced pattern of invoke "
                     << method->PrettyMethod();
-      MaybeRecordStat(kReplacedInvokeWithSimplePattern);
+      MaybeRecordStat(stats_, kReplacedInvokeWithSimplePattern);
       return true;
     }
-    LOG_FAIL(kNotInlinedWont)
+    LOG_FAIL(stats_, kNotInlinedWont)
         << "Won't inline " << method->PrettyMethod() << " in "
         << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
         << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
@@ -1344,7 +1344,7 @@
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    LOG_FAIL(kNotInlinedCodeItem)
+    LOG_FAIL(stats_, kNotInlinedCodeItem)
         << "Method " << method->PrettyMethod()
         << " is not inlined because its code item is too big: "
         << code_item->insns_size_in_code_units_
@@ -1354,13 +1354,13 @@
   }
 
   if (code_item->tries_size_ != 0) {
-    LOG_FAIL(kNotInlinedTryCatch)
+    LOG_FAIL(stats_, kNotInlinedTryCatch)
         << "Method " << method->PrettyMethod() << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    LOG_FAIL(kNotInlinedNotVerified)
+    LOG_FAIL(stats_, kNotInlinedNotVerified)
         << "Method " << method->PrettyMethod()
         << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
@@ -1370,7 +1370,7 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      LOG_FAIL(kNotInlinedNotVerified)
+      LOG_FAIL(stats_, kNotInlinedNotVerified)
           << "Method " << method->PrettyMethod()
           << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -1381,7 +1381,7 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+    LOG_FAIL(stats_, kNotInlinedDexCache) << "Method " << method->PrettyMethod()
              << " is not inlined because it is static and requires a clinit"
              << " check that cannot be emitted due to Dex cache limitations";
     return false;
@@ -1393,7 +1393,7 @@
   }
 
   LOG_SUCCESS() << method->PrettyMethod();
-  MaybeRecordStat(kInlinedInvoke);
+  MaybeRecordStat(stats_, kInlinedInvoke);
   return true;
 }
 
@@ -1677,7 +1677,7 @@
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    LOG_FAIL(kNotInlinedCannotBuild)
+    LOG_FAIL(stats_, kNotInlinedCannotBuild)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be built, so cannot be inlined";
     return false;
@@ -1685,7 +1685,7 @@
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    LOG_FAIL(kNotInlinedRegisterAllocator)
+    LOG_FAIL(stats_, kNotInlinedRegisterAllocator)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " cannot be inlined because of the register allocator";
     return false;
@@ -1738,7 +1738,7 @@
 
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    LOG_FAIL(kNotInlinedInfiniteLoop)
+    LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it has an infinite loop";
     return false;
@@ -1749,14 +1749,14 @@
     if (predecessor->GetLastInstruction()->IsThrow()) {
       if (invoke_instruction->GetBlock()->IsTryBlock()) {
         // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
-        LOG_FAIL(kNotInlinedTryCatch)
+        LOG_FAIL(stats_, kNotInlinedTryCatch)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller is in a try/catch block";
         return false;
       } else if (graph_->GetExitBlock() == nullptr) {
         // TODO(ngeoffray): Support adding HExit in the caller graph.
-        LOG_FAIL(kNotInlinedInfiniteLoop)
+        LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller does not have an exit block";
@@ -1775,7 +1775,7 @@
   }
 
   if (!has_one_return) {
-    LOG_FAIL(kNotInlinedAlwaysThrows)
+    LOG_FAIL(stats_, kNotInlinedAlwaysThrows)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it always throws";
     return false;
@@ -1788,7 +1788,7 @@
       if (block->GetLoopInformation()->IsIrreducible()) {
         // Don't inline methods with irreducible loops, they could prevent some
         // optimizations to run.
-        LOG_FAIL(kNotInlinedIrreducibleLoop)
+        LOG_FAIL(stats_, kNotInlinedIrreducibleLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains an irreducible loop";
         return false;
@@ -1797,7 +1797,7 @@
         // Don't inline methods with loops without exit, since they cause the
         // loop information to be computed incorrectly when updating after
         // inlining.
-        LOG_FAIL(kNotInlinedLoopWithoutExit)
+        LOG_FAIL(stats_, kNotInlinedLoopWithoutExit)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains a loop with no exit";
         return false;
@@ -1808,7 +1808,7 @@
          !instr_it.Done();
          instr_it.Advance()) {
       if (++number_of_instructions >= inlining_budget_) {
-        LOG_FAIL(kNotInlinedInstructionBudget)
+        LOG_FAIL(stats_, kNotInlinedInstructionBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because the outer method has reached"
             << " its instruction budget limit.";
@@ -1817,7 +1817,7 @@
       HInstruction* current = instr_it.Current();
       if (current->NeedsEnvironment() &&
           (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
-        LOG_FAIL(kNotInlinedEnvironmentBudget)
+        LOG_FAIL(stats_, kNotInlinedEnvironmentBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because its caller has reached"
             << " its environment budget limit.";
@@ -1827,7 +1827,7 @@
       if (current->NeedsEnvironment() &&
           !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
                                             resolved_method)) {
-        LOG_FAIL(kNotInlinedStackMaps)
+        LOG_FAIL(stats_, kNotInlinedStackMaps)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " needs an environment, is in a different dex file"
@@ -1836,7 +1836,7 @@
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        LOG_FAIL(kNotInlinedDexCache)
+        LOG_FAIL(stats_, kNotInlinedDexCache)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " it is in a different dex file and requires access to the dex cache";
@@ -1848,7 +1848,7 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+        LOG_FAIL(stats_, kNotInlinedUnresolvedEntrypoint)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it is using an unresolved"
             << " entrypoint";
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 143c77f..ca3b191 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -28,12 +28,6 @@
 
 namespace art {
 
-void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
-  if (compilation_stats_ != nullptr) {
-    compilation_stats_->RecordStat(compilation_stat);
-  }
-}
-
 HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
   return block_builder_->GetBlockAt(dex_pc);
 }
@@ -670,6 +664,9 @@
       DCHECK(fence_target != nullptr);
 
       AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
+      MaybeRecordStat(
+          compilation_stats_,
+          MethodCompilationStat::kConstructorFenceGeneratedFinal);
     }
     AppendInstruction(new (arena_) HReturnVoid(dex_pc));
   } else {
@@ -816,7 +813,8 @@
   ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
 
   if (UNLIKELY(resolved_method == nullptr)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
@@ -1039,6 +1037,9 @@
   HConstructorFence* ctor_fence =
       new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_);
   AppendInstruction(ctor_fence);
+  MaybeRecordStat(
+      compilation_stats_,
+      MethodCompilationStat::kConstructorFenceGeneratedNew);
 }
 
 static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
@@ -1122,7 +1123,8 @@
       VLOG(compiler) << "Did not compile "
                      << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kNotCompiledMalformedOpcode);
       return false;
     }
     HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
@@ -1136,7 +1138,8 @@
     VLOG(compiler) << "Did not compile "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledMalformedOpcode);
     return false;
   }
 
@@ -1286,7 +1289,8 @@
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
     HInstruction* field_set = nullptr;
     if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kUnresolvedField);
       field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
                                                            value,
                                                            field_type,
@@ -1309,7 +1313,8 @@
   } else {
     HInstruction* field_get = nullptr;
     if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kUnresolvedField);
       field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
                                                            field_type,
                                                            field_index,
@@ -1444,7 +1449,8 @@
   ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put);
 
   if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedField);
     Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
     return true;
@@ -1462,7 +1468,8 @@
   if (constant == nullptr) {
     // The class cannot be referenced from this compiled code. Generate
     // an unresolved access.
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
     return true;
   }
@@ -2823,7 +2830,8 @@
                      << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of unhandled instruction "
                      << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kNotCompiledUnhandledInstruction);
       return false;
   }
   return true;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 2a9b9f5..b7fa394 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -78,8 +78,6 @@
   bool Build();
 
  private:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
-
   void InitializeBlockLocals();
   void PropagateLocalsToCatchBlocks();
   void SetLoopHeaderPhiInputs();
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 02cfbbc..f2a829f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -43,13 +43,7 @@
   void RecordSimplification() {
     simplification_occurred_ = true;
     simplifications_at_current_position_++;
-    MaybeRecordStat(kInstructionSimplifications);
-  }
-
-  void MaybeRecordStat(MethodCompilationStat stat) {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(stat);
-    }
+    MaybeRecordStat(stats_, kInstructionSimplifications);
   }
 
   bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
@@ -65,6 +59,7 @@
   bool TryDeMorganNegationFactoring(HBinaryOperation* op);
   bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
   bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+  bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
 
   void VisitShift(HBinaryOperation* shift);
 
@@ -104,6 +99,7 @@
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitInvoke(HInvoke* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
+  void VisitVecMul(HVecMul* instruction) OVERRIDE;
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
@@ -249,6 +245,84 @@
   return false;
 }
 
+bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) {
+  Primitive::Type type = mul->GetPackedType();
+  InstructionSet isa = codegen_->GetInstructionSet();
+  switch (isa) {
+    case kArm64:
+      if (!(type == Primitive::kPrimByte ||
+            type == Primitive::kPrimChar ||
+            type == Primitive::kPrimShort ||
+            type == Primitive::kPrimInt)) {
+        return false;
+      }
+      break;
+    case kMips:
+    case kMips64:
+      if (!(type == Primitive::kPrimByte ||
+            type == Primitive::kPrimChar ||
+            type == Primitive::kPrimShort ||
+            type == Primitive::kPrimInt ||
+            type == Primitive::kPrimLong)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* use = mul->GetUses().front().GetUser();
+    if (use->IsVecAdd() || use->IsVecSub()) {
+      // Replace code looking like
+      //    VECMUL tmp, x, y
+      //    VECADD/SUB dst, acc, tmp
+      // with
+      //    VECMULACC dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HVecBinaryOperation* binop = use->AsVecBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // This is always true since the `HVecMul` has only one use (which is checked above).
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsVecAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      HInstruction::InstructionKind kind =
+          use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+      if (accumulator != nullptr) {
+        HVecMultiplyAccumulate* mulacc =
+            new (arena) HVecMultiplyAccumulate(arena,
+                                               kind,
+                                               accumulator,
+                                               mul->GetLeft(),
+                                               mul->GetRight(),
+                                               binop->GetPackedType(),
+                                               binop->GetVectorLength());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HInstruction* shift_amount = instruction->GetRight();
@@ -517,7 +591,7 @@
 
   if (object->IsNullConstant()) {
     check_cast->GetBlock()->RemoveInstruction(check_cast);
-    MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
+    MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
     return;
   }
 
@@ -527,7 +601,7 @@
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
-      MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
+      MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
       if (!load_class->HasUses()) {
         // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
         // However, here we know that it cannot because the checkcast was successfull, hence
@@ -557,7 +631,7 @@
 
   HGraph* graph = GetGraph();
   if (object->IsNullConstant()) {
-    MaybeRecordStat(kRemovedInstanceOf);
+    MaybeRecordStat(stats_, kRemovedInstanceOf);
     instruction->ReplaceWith(graph->GetIntConstant(0));
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
@@ -568,7 +642,7 @@
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
-    MaybeRecordStat(kRemovedInstanceOf);
+    MaybeRecordStat(stats_, kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
       HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object);
@@ -2307,4 +2381,10 @@
   return true;
 }
 
+void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) {
+  if (TryCombineVecMultiplyAccumulate(instruction)) {
+    RecordSimplification();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 311be1f..7c9bfb1 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -210,12 +210,6 @@
   }
 }
 
-void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
-  if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) {
-    RecordSimplification();
-  }
-}
-
 void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
   if (!instruction->IsStringCharAt()
       && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 8596f6a..4f16fc3 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -74,7 +74,6 @@
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
-  void VisitVecMul(HVecMul* instruction) OVERRIDE;
   void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
   void VisitVecStore(HVecStore* instruction) OVERRIDE;
 
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index d1bc4da..7a759b9 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -281,73 +281,6 @@
   return true;
 }
 
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
-  Primitive::Type type = mul->GetPackedType();
-  switch (isa) {
-    case kArm64:
-      if (!(type == Primitive::kPrimByte ||
-            type == Primitive::kPrimChar ||
-            type == Primitive::kPrimShort ||
-            type == Primitive::kPrimInt)) {
-        return false;
-      }
-      break;
-    default:
-      return false;
-  }
-
-  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
-
-  if (mul->HasOnlyOneNonEnvironmentUse()) {
-    HInstruction* use = mul->GetUses().front().GetUser();
-    if (use->IsVecAdd() || use->IsVecSub()) {
-      // Replace code looking like
-      //    VECMUL tmp, x, y
-      //    VECADD/SUB dst, acc, tmp
-      // with
-      //    VECMULACC dst, acc, x, y
-      // Note that we do not want to (unconditionally) perform the merge when the
-      // multiplication has multiple uses and it can be merged in all of them.
-      // Multiple uses could happen on the same control-flow path, and we would
-      // then increase the amount of work. In the future we could try to evaluate
-      // whether all uses are on different control-flow paths (using dominance and
-      // reverse-dominance information) and only perform the merge when they are.
-      HInstruction* accumulator = nullptr;
-      HVecBinaryOperation* binop = use->AsVecBinaryOperation();
-      HInstruction* binop_left = binop->GetLeft();
-      HInstruction* binop_right = binop->GetRight();
-      // This is always true since the `HVecMul` has only one use (which is checked above).
-      DCHECK_NE(binop_left, binop_right);
-      if (binop_right == mul) {
-        accumulator = binop_left;
-      } else if (use->IsVecAdd()) {
-        DCHECK_EQ(binop_left, mul);
-        accumulator = binop_right;
-      }
-
-      HInstruction::InstructionKind kind =
-          use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
-      if (accumulator != nullptr) {
-        HVecMultiplyAccumulate* mulacc =
-            new (arena) HVecMultiplyAccumulate(arena,
-                                               kind,
-                                               accumulator,
-                                               mul->GetLeft(),
-                                               mul->GetRight(),
-                                               binop->GetPackedType(),
-                                               binop->GetVectorLength());
-
-        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-        DCHECK(!mul->HasUses());
-        mul->GetBlock()->RemoveInstruction(mul);
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
   if (index->IsConstant()) {
     // If index is constant the whole address calculation often can be done by LDR/STR themselves.
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 371619f..31e2383 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -58,7 +58,6 @@
                                   HInstruction* index,
                                   size_t data_offset);
 
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
 
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 7bdeef5..11725f4 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -154,7 +154,8 @@
                                  NeedsEnvironmentOrCache(intrinsic),
                                  GetSideEffects(intrinsic),
                                  GetExceptions(intrinsic));
-            MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
+            MaybeRecordStat(stats_,
+                            MethodCompilationStat::kIntrinsicRecognized);
           }
         }
       }
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 4cea6df..2669d97 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -22,6 +22,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index d785567..74be954 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -22,6 +22,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index f0086fb..10524b0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -141,7 +141,7 @@
             DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
-          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
+          MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) {
           // If `instruction` can do something visible (throw or write),
           // we cannot move further instructions that can throw.
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 86fb8e0..a2c1794 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -466,6 +466,11 @@
     CreateReferenceInfoForReferenceType(new_instance);
   }
 
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
+    // Any references appearing in the ref_info_array_ so far cannot alias with new_array.
+    CreateReferenceInfoForReferenceType(new_array);
+  }
+
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
     CreateReferenceInfoForReferenceType(instruction);
   }
@@ -478,6 +483,22 @@
     CreateReferenceInfoForReferenceType(instruction);
   }
 
+  void VisitInvokeUnresolved(HInvokeUnresolved* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokePolymorphic(HInvokePolymorphic* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitLoadString(HLoadString* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitPhi(HPhi* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
   void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
     CreateReferenceInfoForReferenceType(instruction);
   }
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index fddda3d..98b8592 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -40,8 +40,9 @@
  public:
   LSEVisitor(HGraph* graph,
              const HeapLocationCollector& heap_locations_collector,
-             const SideEffectsAnalysis& side_effects)
-      : HGraphVisitor(graph),
+             const SideEffectsAnalysis& side_effects,
+             OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph, stats),
         heap_location_collector_(heap_locations_collector),
         side_effects_(side_effects),
         heap_values_for_(graph->GetBlocks().size(),
@@ -100,7 +101,10 @@
     //   * - Constructor fences (they never escape this thread).
     //   * - Allocations (if they are unused).
     for (HInstruction* new_instance : singleton_new_instances_) {
-      HConstructorFence::RemoveConstructorFences(new_instance);
+      size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedLSE,
+                      removed);
 
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
@@ -108,7 +112,10 @@
       }
     }
     for (HInstruction* new_array : singleton_new_arrays_) {
-      HConstructorFence::RemoveConstructorFences(new_array);
+      size_t removed = HConstructorFence::RemoveConstructorFences(new_array);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedLSE,
+                      removed);
 
       if (!new_array->HasNonEnvironmentUses()) {
         new_array->RemoveEnvironmentUsers();
@@ -663,7 +670,7 @@
     return;
   }
 
-  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
+  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_);
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     lse_visitor.VisitBasicBlock(block);
   }
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index efe71c7..20a8a76 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -28,8 +28,9 @@
  public:
   LoadStoreElimination(HGraph* graph,
                        const SideEffectsAnalysis& side_effects,
-                       const LoadStoreAnalysis& lsa)
-      : HOptimization(graph, kLoadStoreEliminationPassName),
+                       const LoadStoreAnalysis& lsa,
+                       OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoadStoreEliminationPassName, stats),
         side_effects_(side_effects),
         lsa_(lsa) {}
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 274f065..027ba77 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -40,6 +40,8 @@
   instruction->RemoveAsUserOfAllInputs();
   instruction->RemoveEnvironmentUsers();
   instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
+  RemoveEnvironmentUses(instruction);
+  ResetEnvironmentInputRecords(instruction);
 }
 
 // Detect a goto block and sets succ to the single successor.
@@ -254,6 +256,35 @@
   return false;
 }
 
+// Detect reductions of the following forms,
+// under assumption phi has only *one* use:
+//   x = x_phi + ..
+//   x = x_phi - ..
+//   x = max(x_phi, ..)
+//   x = min(x_phi, ..)
+static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
+  if (reduction->IsAdd()) {
+    return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+  } else if (reduction->IsSub()) {
+    return reduction->InputAt(0) == phi;
+  } else if (reduction->IsInvokeStaticOrDirect()) {
+    switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+      case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathMinDoubleDouble:
+      case Intrinsics::kMathMaxIntInt:
+      case Intrinsics::kMathMaxLongLong:
+      case Intrinsics::kMathMaxFloatFloat:
+      case Intrinsics::kMathMaxDoubleDouble:
+        return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+      default:
+        return false;
+    }
+  }
+  return false;
+}
+
 // Test vector restrictions.
 static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
   return (restrictions & tested) != 0;
@@ -267,8 +298,22 @@
   return instruction;
 }
 
+// Check that instructions from the induction sets are fully removed: have no uses
+// and no other instructions use them.
+static bool CheckInductionSetFullyRemoved(ArenaSet<HInstruction*>* iset) {
+  for (HInstruction* instr : *iset) {
+    if (instr->GetBlock() != nullptr ||
+        !instr->GetUses().empty() ||
+        !instr->GetEnvUses().empty() ||
+        HasEnvironmentUsedByOthers(instr)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 //
-// Class methods.
+// Public methods.
 //
 
 HLoopOptimization::HLoopOptimization(HGraph* graph,
@@ -282,7 +327,7 @@
       top_loop_(nullptr),
       last_loop_(nullptr),
       iset_(nullptr),
-      induction_simplication_count_(0),
+      reductions_(nullptr),
       simplified_(false),
       vector_length_(0),
       vector_refs_(nullptr),
@@ -316,6 +361,10 @@
   last_loop_ = top_loop_ = nullptr;
 }
 
+//
+// Loop setup and traversal.
+//
+
 void HLoopOptimization::LocalRun() {
   // Build the linear order using the phase-local allocator. This step enables building
   // a loop hierarchy that properly reflects the outer-inner and previous-next relation.
@@ -334,17 +383,21 @@
   // should use the global allocator.
   if (top_loop_ != nullptr) {
     ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+    ArenaSafeMap<HInstruction*, HInstruction*> reds(
+        std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
     ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
     ArenaSafeMap<HInstruction*, HInstruction*> map(
         std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
     // Attach.
     iset_ = &iset;
+    reductions_ = &reds;
     vector_refs_ = &refs;
     vector_map_ = &map;
     // Traverse.
     TraverseLoopsInnerToOuter(top_loop_);
     // Detach.
     iset_ = nullptr;
+    reductions_ = nullptr;
     vector_refs_ = nullptr;
     vector_map_ = nullptr;
   }
@@ -397,16 +450,12 @@
   }
 }
 
-void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+  bool changed = false;
   for ( ; node != nullptr; node = node->next) {
-    // Visit inner loops first.
-    uint32_t current_induction_simplification_count = induction_simplication_count_;
-    if (node->inner != nullptr) {
-      TraverseLoopsInnerToOuter(node->inner);
-    }
-    // Recompute induction information of this loop if the induction
-    // of any inner loop has been simplified.
-    if (current_induction_simplification_count != induction_simplication_count_) {
+    // Visit inner loops first. Recompute induction information for this
+    // loop if the induction of any inner loop has changed.
+    if (TraverseLoopsInnerToOuter(node->inner)) {
       induction_range_.ReVisit(node->loop_info);
     }
     // Repeat simplifications in the loop-body until no more changes occur.
@@ -416,12 +465,14 @@
       simplified_ = false;
       SimplifyInduction(node);
       SimplifyBlocks(node);
+      changed = simplified_ || changed;
     } while (simplified_);
     // Optimize inner loop.
     if (node->inner == nullptr) {
-      OptimizeInnerLoop(node);
+      changed = OptimizeInnerLoop(node) || changed;
     }
   }
+  return changed;
 }
 
 //
@@ -438,17 +489,18 @@
   //           for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
   for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
-    iset_->clear();  // prepare phi induction
     if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
         TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
       // Note that it's ok to have replaced uses after the loop with the last value, without
       // being able to remove the cycle. Environment uses (which are the reason we may not be
-      // able to remove the cycle) within the loop will still hold the right value.
+      // able to remove the cycle) within the loop will still hold the right value. We must
+      // have tried first, however, to replace outside uses.
       if (CanRemoveCycle()) {
+        simplified_ = true;
         for (HInstruction* i : *iset_) {
           RemoveFromCycle(i);
         }
-        simplified_ = true;
+        DCHECK(CheckInductionSetFullyRemoved(iset_));
       }
     }
   }
@@ -491,21 +543,20 @@
   }
 }
 
-void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
   // Ensure loop header logic is finite.
   int64_t trip_count = 0;
   if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
-    return;
+    return false;
   }
-
   // Ensure there is only a single loop-body (besides the header).
   HBasicBlock* body = nullptr;
   for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
     if (it.Current() != header) {
       if (body != nullptr) {
-        return;
+        return false;
       }
       body = it.Current();
     }
@@ -513,27 +564,27 @@
   CHECK(body != nullptr);
   // Ensure there is only a single exit point.
   if (header->GetSuccessors().size() != 2) {
-    return;
+    return false;
   }
   HBasicBlock* exit = (header->GetSuccessors()[0] == body)
       ? header->GetSuccessors()[1]
       : header->GetSuccessors()[0];
   // Ensure exit can only be reached by exiting loop.
   if (exit->GetPredecessors().size() != 1) {
-    return;
+    return false;
   }
   // Detect either an empty loop (no side effects other than plain iteration) or
   // a trivial loop (just iterating once). Replace subsequent index uses, if any,
   // with the last value and remove the loop, possibly after unrolling its body.
-  HInstruction* phi = header->GetFirstPhi();
-  iset_->clear();  // prepare phi induction
-  if (TrySetSimpleLoopHeader(header)) {
+  HPhi* main_phi = nullptr;
+  if (TrySetSimpleLoopHeader(header, &main_phi)) {
     bool is_empty = IsEmptyBody(body);
-    if ((is_empty || trip_count == 1) &&
-        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
+    if (reductions_->empty() &&  // TODO: possible with some effort
+        (is_empty || trip_count == 1) &&
+        TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
       if (!is_empty) {
         // Unroll the loop-body, which sees initial value of the index.
-        phi->ReplaceWith(phi->InputAt(0));
+        main_phi->ReplaceWith(main_phi->InputAt(0));
         preheader->MergeInstructionsWith(body);
       }
       body->DisconnectAndDelete();
@@ -546,21 +597,20 @@
       preheader->AddDominatedBlock(exit);
       exit->SetDominator(preheader);
       RemoveLoop(node);  // update hierarchy
-      return;
+      return true;
     }
   }
-
   // Vectorize loop, if possible and valid.
-  if (kEnableVectorization) {
-    iset_->clear();  // prepare phi induction
-    if (TrySetSimpleLoopHeader(header) &&
-        ShouldVectorize(node, body, trip_count) &&
-        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
-      Vectorize(node, body, exit, trip_count);
-      graph_->SetHasSIMD(true);  // flag SIMD usage
-      return;
-    }
+  if (kEnableVectorization &&
+      TrySetSimpleLoopHeader(header, &main_phi) &&
+      reductions_->empty() &&  // TODO: possible with some effort
+      ShouldVectorize(node, body, trip_count) &&
+      TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
+    Vectorize(node, body, exit, trip_count);
+    graph_->SetHasSIMD(true);  // flag SIMD usage
+    return true;
   }
+  return false;
 }
 
 //
@@ -601,6 +651,8 @@
   // aliased, as well as the property that references either point to the same
   // array or to two completely disjoint arrays, i.e., no partial aliasing.
   // Other than a few simply heuristics, no detailed subscript analysis is done.
+  // The scan over references also finds a suitable dynamic loop peeling candidate.
+  const ArrayReference* candidate = nullptr;
   for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
     for (auto j = i; ++j != vector_refs_->end(); ) {
       if (i->type == j->type && (i->lhs || j->lhs)) {
@@ -636,7 +688,7 @@
   }
 
   // Consider dynamic loop peeling for alignment.
-  SetPeelingCandidate(trip_count);
+  SetPeelingCandidate(candidate, trip_count);
 
   // Success!
   return true;
@@ -659,14 +711,15 @@
   bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0;
 
   // Adjust vector bookkeeping.
-  iset_->clear();  // prepare phi induction
-  bool is_simple_loop_header = TrySetSimpleLoopHeader(header);  // fills iset_
+  HPhi* main_phi = nullptr;
+  bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi);  // refills sets
   DCHECK(is_simple_loop_header);
   vector_header_ = header;
   vector_body_ = block;
 
-  // Generate dynamic loop peeling trip count, if needed:
-  // ptc = <peeling-needed-for-candidate>
+  // Generate dynamic loop peeling trip count, if needed, under the assumption
+  // that the Android runtime guarantees at least "component size" alignment:
+  // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
   HInstruction* ptc = nullptr;
   if (vector_peeling_candidate_ != nullptr) {
     DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count";
@@ -755,6 +808,7 @@
   while (!header->GetFirstInstruction()->IsGoto()) {
     header->RemoveInstruction(header->GetFirstInstruction());
   }
+
   // Update loop hierarchy: the old header now resides in the same outer loop
   // as the old preheader. Note that we don't bother putting sequential
   // loops back in the hierarchy at this point.
@@ -821,13 +875,12 @@
     vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
     Insert(vector_body_, vector_index_);
   }
-  // Finalize phi for the loop index.
+  // Finalize phi inputs for the loop index.
   phi->AddInput(lo);
   phi->AddInput(vector_index_);
   vector_index_ = phi;
 }
 
-// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc.
 bool HLoopOptimization::VectorizeDef(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code) {
@@ -1098,7 +1151,7 @@
     case kArm:
     case kThumb2:
       // Allow vectorization for all ARM devices, because Android assumes that
-      // ARM 32-bit always supports advanced SIMD.
+      // ARM 32-bit always supports advanced SIMD (64-bit SIMD).
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
@@ -1117,7 +1170,7 @@
       return false;
     case kArm64:
       // Allow vectorization for all ARM devices, because Android assumes that
-      // ARMv8 AArch64 always supports advanced SIMD.
+      // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
@@ -1142,7 +1195,7 @@
       }
     case kX86:
     case kX86_64:
-      // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors).
+      // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
       if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
         switch (type) {
           case Primitive::kPrimBoolean:
@@ -1290,8 +1343,6 @@
           global_allocator_, base, opa, type, vector_length_, is_string_char_at);
     }
     // Known dynamically enforced alignment?
-    // TODO: detect offset + constant differences.
-    // TODO: long run, static alignment analysis?
     if (vector_peeling_candidate_ != nullptr &&
         vector_peeling_candidate_->base == base &&
         vector_peeling_candidate_->offset == offset) {
@@ -1566,9 +1617,11 @@
   return true;
 }
 
-void HLoopOptimization::SetPeelingCandidate(int64_t trip_count ATTRIBUTE_UNUSED) {
+void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate,
+                                            int64_t trip_count ATTRIBUTE_UNUSED) {
   // Current heuristic: none.
   // TODO: implement
+  vector_peeling_candidate_ = candidate;
 }
 
 uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
@@ -1596,13 +1649,17 @@
 //
 
 bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
+  // Start with empty phi induction.
+  iset_->clear();
+
   // Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't
   // smart enough to follow strongly connected components (and it's probably not worth
   // it to make it so). See b/33775412.
   if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) {
     return false;
   }
-  DCHECK(iset_->empty());
+
+  // Lookup phi induction cycle.
   ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
   if (set != nullptr) {
     for (HInstruction* i : *set) {
@@ -1614,6 +1671,7 @@
       } else if (!i->IsRemovable()) {
         return false;
       } else if (i != phi && restrict_uses) {
+        // Deal with regular uses.
         for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
           if (set->find(use.GetUser()) == set->end()) {
             return false;
@@ -1627,17 +1685,65 @@
   return false;
 }
 
-// Find: phi: Phi(init, addsub)
-//       s:   SuspendCheck
-//       c:   Condition(phi, bound)
-//       i:   If(c)
-// TODO: Find a less pattern matching approach?
-bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
+bool HLoopOptimization::TrySetPhiReduction(HPhi* phi) {
   DCHECK(iset_->empty());
-  HInstruction* phi = block->GetFirstPhi();
-  if (phi != nullptr &&
-      phi->GetNext() == nullptr &&
-      TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) {
+  // Only unclassified phi cycles are candidates for reductions.
+  if (induction_range_.IsClassified(phi)) {
+    return false;
+  }
+  // Accept operations like x = x + .., provided that the phi and the reduction are
+  // used exactly once inside the loop, and by each other.
+  HInputsRef inputs = phi->GetInputs();
+  if (inputs.size() == 2) {
+    HInstruction* reduction = inputs[1];
+    if (HasReductionFormat(reduction, phi)) {
+      HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
+      int32_t use_count = 0;
+      bool single_use_inside_loop =
+          // Reduction update only used by phi.
+          reduction->GetUses().HasExactlyOneElement() &&
+          !reduction->HasEnvironmentUses() &&
+          // Reduction update is only use of phi inside the loop.
+          IsOnlyUsedAfterLoop(loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
+          iset_->size() == 1;
+      iset_->clear();  // leave the way you found it
+      if (single_use_inside_loop) {
+        // Link reduction back, and start recording feed value.
+        reductions_->Put(reduction, phi);
+        reductions_->Put(phi, phi->InputAt(0));
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi) {
+  // Start with empty phi induction and reductions.
+  iset_->clear();
+  reductions_->clear();
+
+  // Scan the phis to find the following (the induction structure has already
+  // been optimized, so we don't need to worry about trivial cases):
+  // (1) optional reductions in loop,
+  // (2) the main induction, used in loop control.
+  HPhi* phi = nullptr;
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    if (TrySetPhiReduction(it.Current()->AsPhi())) {
+      continue;
+    } else if (phi == nullptr) {
+      // Found the first candidate for main induction.
+      phi = it.Current()->AsPhi();
+    } else {
+      return false;
+    }
+  }
+
+  // Then test for a typical loopheader:
+  //   s:  SuspendCheck
+  //   c:  Condition(phi, bound)
+  //   i:  If(c)
+  if (phi != nullptr && TrySetPhiInduction(phi, /*restrict_uses*/ false)) {
     HInstruction* s = block->GetFirstInstruction();
     if (s != nullptr && s->IsSuspendCheck()) {
       HInstruction* c = s->GetNext();
@@ -1649,6 +1755,7 @@
         if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
           iset_->insert(c);
           iset_->insert(s);
+          *main_phi = phi;
           return true;
         }
       }
@@ -1672,6 +1779,7 @@
 
 bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
                                           HInstruction* instruction) {
+  // Deal with regular uses.
   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
     if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
       return true;
@@ -1684,6 +1792,7 @@
                                             HInstruction* instruction,
                                             bool collect_loop_uses,
                                             /*out*/ int32_t* use_count) {
+  // Deal with regular uses.
   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
     HInstruction* user = use.GetUser();
     if (iset_->find(user) == iset_->end()) {  // not excluded?
@@ -1709,6 +1818,7 @@
   // Try to replace outside uses with the last value.
   if (induction_range_.CanGenerateLastValue(instruction)) {
     HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block);
+    // Deal with regular uses.
     const HUseList<HInstruction*>& uses = instruction->GetUses();
     for (auto it = uses.begin(), end = uses.end(); it != end;) {
       HInstruction* user = it->GetUser();
@@ -1724,6 +1834,7 @@
         induction_range_.Replace(user, instruction, replacement);  // update induction
       }
     }
+    // Deal with environment uses.
     const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
     for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
       HEnvironment* user = it->GetUser();
@@ -1739,7 +1850,6 @@
         }
       }
     }
-    induction_simplication_count_++;
     return true;
   }
   return false;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index de4bd85..49be8a3 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -104,18 +104,33 @@
     bool lhs;              // def/use
   };
 
+  //
   // Loop setup and traversal.
+  //
+
   void LocalRun();
   void AddLoop(HLoopInformation* loop_info);
   void RemoveLoop(LoopNode* node);
-  void TraverseLoopsInnerToOuter(LoopNode* node);
 
+  // Traverses all loops inner to outer to perform simplifications and optimizations.
+  // Returns true if loops nested inside current loop (node) have changed.
+  bool TraverseLoopsInnerToOuter(LoopNode* node);
+
+  //
   // Optimization.
+  //
+
   void SimplifyInduction(LoopNode* node);
   void SimplifyBlocks(LoopNode* node);
-  void OptimizeInnerLoop(LoopNode* node);
 
+  // Performs optimizations specific to inner loop (empty loop removal,
+  // unrolling, vectorization). Returns true if anything changed.
+  bool OptimizeInnerLoop(LoopNode* node);
+
+  //
   // Vectorization analysis and synthesis.
+  //
+
   bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
   void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
   void GenerateNewLoop(LoopNode* node,
@@ -155,12 +170,20 @@
 
   // Vectorization heuristics.
   bool IsVectorizationProfitable(int64_t trip_count);
-  void SetPeelingCandidate(int64_t trip_count);
+  void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
   uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
 
+  //
   // Helpers.
+  //
+
   bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
-  bool TrySetSimpleLoopHeader(HBasicBlock* block);
+  bool TrySetPhiReduction(HPhi* phi);
+
+  // Detects loop header with a single induction (returned in main_phi), possibly
+  // other phis for reductions, but no other side effects. Returns true on success.
+  bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
+
   bool IsEmptyBody(HBasicBlock* block);
   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                            HInstruction* instruction,
@@ -200,10 +223,12 @@
   // Contents reside in phase-local heap memory.
   ArenaSet<HInstruction*>* iset_;
 
-  // Counter that tracks how many induction cycles have been simplified. Useful
-  // to trigger incremental updates of induction variable analysis of outer loops
-  // when the induction of inner loops has changed.
-  uint32_t induction_simplication_count_;
+  // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
+  // (1) reductions in the loop-body are mapped back to their phi definition,
+  // (2) phi definitions are mapped to their initial value (updated during
+  //     code generation to feed the proper values into the new chain).
+  // Contents reside in phase-local heap memory.
+  ArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
 
   // Flag that tracks if any simplifications have occurred.
   bool simplified_;
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 5b93506..b5b03d8 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -195,4 +195,44 @@
   EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure());
 }
 
+// Check that SimplifyLoop() doesn't invalidate data flow when ordering loop headers'
+// predecessors.
+TEST_F(LoopOptimizationTest, SimplifyLoop) {
+  // Can't use AddLoop as we want special order for blocks predecessors.
+  HBasicBlock* header = new (&allocator_) HBasicBlock(graph_);
+  HBasicBlock* body = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(header);
+  graph_->AddBlock(body);
+
+  // Control flow: make a loop back edge first in the list of predecessors.
+  entry_block_->RemoveSuccessor(return_block_);
+  body->AddSuccessor(header);
+  entry_block_->AddSuccessor(header);
+  header->AddSuccessor(body);
+  header->AddSuccessor(return_block_);
+  DCHECK(header->GetSuccessors()[1] == return_block_);
+
+  // Data flow.
+  header->AddInstruction(new (&allocator_) HIf(parameter_));
+  body->AddInstruction(new (&allocator_) HGoto());
+
+  HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+  HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, parameter_);
+  header->AddPhi(phi);
+  body->AddInstruction(add);
+
+  phi->AddInput(add);
+  phi->AddInput(parameter_);
+
+  graph_->ClearLoopInformation();
+  graph_->ClearDominanceInformation();
+  graph_->BuildDominatorTree();
+
+  // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
+  // are still mapped correctly to the block predecessors.
+  for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
+    HInstruction* input = phi->InputAt(i);
+    ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+  }
+}
 }  // namespace art
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca48e08..1510eaf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -90,7 +90,8 @@
   }
 }
 
-static void RemoveEnvironmentUses(HInstruction* instruction) {
+// Remove the environment use records of the instruction for users.
+void RemoveEnvironmentUses(HInstruction* instruction) {
   for (HEnvironment* environment = instruction->GetEnvironment();
        environment != nullptr;
        environment = environment->GetParent()) {
@@ -102,6 +103,35 @@
   }
 }
 
+// Return whether the instruction has an environment and it's used by others.
+bool HasEnvironmentUsedByOthers(HInstruction* instruction) {
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* user = environment->GetInstructionAt(i);
+      if (user != nullptr) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Reset environment records of the instruction itself.
+void ResetEnvironmentInputRecords(HInstruction* instruction) {
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      DCHECK(environment->GetHolder() == instruction);
+      if (environment->GetInstructionAt(i) != nullptr) {
+        environment->SetRawEnvAt(i, nullptr);
+      }
+    }
+  }
+}
+
 static void RemoveAsUser(HInstruction* instruction) {
   instruction->RemoveAsUserOfAllInputs();
   RemoveEnvironmentUses(instruction);
@@ -328,6 +358,35 @@
   }
 }
 
+// Reorder phi inputs to match reordering of the block's predecessors.
+static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) {
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* phi = it.Current()->AsPhi();
+    HInstruction* first_instr = phi->InputAt(first);
+    HInstruction* second_instr = phi->InputAt(second);
+    phi->ReplaceInput(first_instr, second);
+    phi->ReplaceInput(second_instr, first);
+  }
+}
+
+// Make sure that the first predecessor of a loop header is the incoming block.
+void HGraph::OrderLoopHeaderPredecessors(HBasicBlock* header) {
+  DCHECK(header->IsLoopHeader());
+  HLoopInformation* info = header->GetLoopInformation();
+  if (info->IsBackEdge(*header->GetPredecessors()[0])) {
+    HBasicBlock* to_swap = header->GetPredecessors()[0];
+    for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessors()[pred];
+      if (!info->IsBackEdge(*predecessor)) {
+        header->predecessors_[pred] = to_swap;
+        header->predecessors_[0] = predecessor;
+        FixPhisAfterPredecessorsReodering(header, 0, pred);
+        break;
+      }
+    }
+  }
+}
+
 void HGraph::SimplifyLoop(HBasicBlock* header) {
   HLoopInformation* info = header->GetLoopInformation();
 
@@ -351,18 +410,7 @@
     pre_header->AddSuccessor(header);
   }
 
-  // Make sure the first predecessor of a loop header is the incoming block.
-  if (info->IsBackEdge(*header->GetPredecessors()[0])) {
-    HBasicBlock* to_swap = header->GetPredecessors()[0];
-    for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessors()[pred];
-      if (!info->IsBackEdge(*predecessor)) {
-        header->predecessors_[pred] = to_swap;
-        header->predecessors_[0] = predecessor;
-        break;
-      }
-    }
-  }
+  OrderLoopHeaderPredecessors(header);
 
   HInstruction* first_instruction = header->GetFirstInstruction();
   if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) {
@@ -1168,11 +1216,14 @@
   DCHECK_EQ(0u, InputCount());
 }
 
-void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
   DCHECK(instruction->GetBlock() != nullptr);
   // Removing constructor fences only makes sense for instructions with an object return type.
   DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
 
+  // Return how many instructions were removed for statistic purposes.
+  size_t remove_count = 0;
+
   // Efficient implementation that simultaneously (in one pass):
   // * Scans the uses list for all constructor fences.
   // * Deletes that constructor fence from the uses list of `instruction`.
@@ -1220,6 +1271,7 @@
       // is removed.
       if (ctor_fence->InputCount() == 0u) {
         ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+        ++remove_count;
       }
     }
   }
@@ -1233,6 +1285,8 @@
     }
     CHECK(instruction->GetBlock() != nullptr);
   }
+
+  return remove_count;
 }
 
 HInstruction* HConstructorFence::GetAssociatedAllocation() {
@@ -1697,6 +1751,10 @@
   return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsGoto();
 }
 
+bool HBasicBlock::IsSingleReturn() const {
+  return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsReturn();
+}
+
 bool HBasicBlock::IsSingleTryBoundary() const {
   return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsTryBoundary();
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fa29378..f60d532 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -418,6 +418,7 @@
   HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor);
 
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
+  void OrderLoopHeaderPredecessors(HBasicBlock* header);
   void SimplifyLoop(HBasicBlock* header);
 
   int32_t GetNextInstructionId() {
@@ -958,6 +959,7 @@
   }
 
   bool IsSingleGoto() const;
+  bool IsSingleReturn() const;
   bool IsSingleTryBoundary() const;
 
   // Returns true if this block emits nothing but a jump.
@@ -6630,7 +6632,9 @@
   // This must *not* be called during/after prepare_for_register_allocation,
   // because that removes all the inputs to the fences but the fence is actually
   // still considered live.
-  static void RemoveConstructorFences(HInstruction* instruction);
+  //
+  // Returns how many HConstructorFence instructions were removed from graph.
+  static size_t RemoveConstructorFences(HInstruction* instruction);
 
   // Check if this constructor fence is protecting
   // an HNewInstance or HNewArray that is also the immediate
@@ -6878,9 +6882,13 @@
 
 namespace art {
 
+class OptimizingCompilerStats;
+
 class HGraphVisitor : public ValueObject {
  public:
-  explicit HGraphVisitor(HGraph* graph) : graph_(graph) {}
+  explicit HGraphVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr)
+      : stats_(stats),
+        graph_(graph) {}
   virtual ~HGraphVisitor() {}
 
   virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
@@ -6902,6 +6910,9 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+ protected:
+  OptimizingCompilerStats* stats_;
+
  private:
   HGraph* const graph_;
 
@@ -6910,7 +6921,8 @@
 
 class HGraphDelegateVisitor : public HGraphVisitor {
  public:
-  explicit HGraphDelegateVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+  explicit HGraphDelegateVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr)
+      : HGraphVisitor(graph, stats) {}
   virtual ~HGraphDelegateVisitor() {}
 
   // Visit functions that delegate to to super class.
@@ -7059,6 +7071,10 @@
   return instruction;
 }
 
+void RemoveEnvironmentUses(HInstruction* instruction);
+bool HasEnvironmentUsedByOthers(HInstruction* instruction);
+void ResetEnvironmentInputRecords(HInstruction* instruction);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 3d76949..1e68ca2 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -17,11 +17,4 @@
 #include "optimization.h"
 
 namespace art {
-
-void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
-  if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat, count);
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 0819fb0..ce41a2e 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -47,8 +47,6 @@
   virtual void Run() = 0;
 
  protected:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
-
   HGraph* const graph_;
   // Used to record stats about the optimization.
   OptimizingCompilerStats* const stats_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 76a243f..e98c97c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,8 +22,6 @@
 
 #include <stdint.h>
 
-#include "android-base/strings.h"
-
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
@@ -329,12 +327,6 @@
 
   void UnInit() const OVERRIDE;
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat) const {
-    if (compilation_stats_.get() != nullptr) {
-      compilation_stats_->RecordStat(compilation_stat);
-    }
-  }
-
   bool JitCompile(Thread* self,
                   jit::JitCodeCache* code_cache,
                   ArtMethod* method,
@@ -497,7 +489,7 @@
   } else if (opt_name == HSharpening::kSharpeningPassName) {
     return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
   } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
-    return new (arena) HSelectGenerator(graph, stats);
+    return new (arena) HSelectGenerator(graph, handles, stats);
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
@@ -512,7 +504,8 @@
   } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr);
     CHECK(most_recent_lsa != nullptr);
-    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa);
+    return
+        new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa, stats);
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
@@ -716,11 +709,12 @@
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
                               PassObserver* pass_observer,
-                              RegisterAllocator::Strategy strategy) {
+                              RegisterAllocator::Strategy strategy,
+                              OptimizingCompilerStats* stats) {
   {
     PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
                     pass_observer);
-    PrepareForRegisterAllocation(graph).Run();
+    PrepareForRegisterAllocation(graph, stats).Run();
   }
   SsaLivenessAnalysis liveness(graph, codegen);
   {
@@ -764,7 +758,7 @@
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(
       graph, codegen, driver, stats);
-  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, handles, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
       graph, "constant_folding$after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
@@ -778,7 +772,7 @@
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
   LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
@@ -894,7 +888,8 @@
                                               ArtMethod* method,
                                               bool osr,
                                               VariableSizedHandleScope* handles) const {
-  MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
+  MaybeRecordStat(compilation_stats_.get(),
+                  MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
@@ -904,12 +899,14 @@
 
   // Do not attempt to compile on architectures we do not support.
   if (!IsInstructionSetSupported(instruction_set)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
     return nullptr;
   }
 
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
   }
 
@@ -919,7 +916,8 @@
   const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
   if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace)
       && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledSpaceFilter);
     return nullptr;
   }
 
@@ -966,7 +964,8 @@
                             compiler_driver->GetCompilerOptions(),
                             compilation_stats_.get()));
   if (codegen.get() == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
@@ -995,17 +994,25 @@
     GraphAnalysisResult result = builder.BuildGraph();
     if (result != kAnalysisSuccess) {
       switch (result) {
-        case kAnalysisSkipped:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+        case kAnalysisSkipped: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledSkipped);
+        }
           break;
-        case kAnalysisInvalidBytecode:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
+        case kAnalysisInvalidBytecode: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledInvalidBytecode);
+        }
           break;
-        case kAnalysisFailThrowCatchLoop:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+        case kAnalysisFailThrowCatchLoop: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledThrowCatchLoop);
+        }
           break;
-        case kAnalysisFailAmbiguousArrayOp:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+        case kAnalysisFailAmbiguousArrayOp: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+        }
           break;
         case kAnalysisSuccess:
           UNREACHABLE();
@@ -1024,7 +1031,11 @@
 
   RegisterAllocator::Strategy regalloc_strategy =
     compiler_options.GetRegisterAllocationStrategy();
-  AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+  AllocateRegisters(graph,
+                    codegen.get(),
+                    &pass_observer,
+                    regalloc_strategy,
+                    compilation_stats_.get());
 
   codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
@@ -1072,7 +1083,8 @@
                      &handles));
     }
     if (codegen.get() != nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kCompiled);
+      MaybeRecordStat(compilation_stats_.get(),
+                      MethodCompilationStat::kCompiled);
       method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
 
       if (kArenaAllocatorCountAllocations) {
@@ -1083,11 +1095,13 @@
       }
     }
   } else {
+    MethodCompilationStat method_stat;
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+      method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime;
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
+      method_stat = MethodCompilationStat::kNotCompiledVerificationError;
     }
+    MaybeRecordStat(compilation_stats_.get(), method_stat);
   }
 
   if (kIsDebugBuild &&
@@ -1111,12 +1125,7 @@
 
 bool IsCompilingWithCoreImage() {
   const std::string& image = Runtime::Current()->GetImageLocation();
-  // TODO: This is under-approximating...
-  if (android::base::EndsWith(image, "core.art") ||
-      android::base::EndsWith(image, "core-optimizing.art")) {
-    return true;
-  }
-  return false;
+  return CompilerDriver::IsCoreImageFilename(image);
 }
 
 bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
@@ -1210,18 +1219,18 @@
   uint8_t* stack_map_data = nullptr;
   uint8_t* method_info_data = nullptr;
   uint8_t* roots_data = nullptr;
-  code_cache->ReserveData(self,
-                          stack_map_size,
-                          method_info_size,
-                          number_of_roots,
-                          method,
-                          &stack_map_data,
-                          &method_info_data,
-                          &roots_data);
+  uint32_t data_size = code_cache->ReserveData(self,
+                                               stack_map_size,
+                                               method_info_size,
+                                               number_of_roots,
+                                               method,
+                                               &stack_map_data,
+                                               &method_info_data,
+                                               &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
-  MaybeRecordStat(MethodCompilationStat::kCompiled);
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
                           MemoryRegion(method_info_data, method_info_size),
                           *code_item);
@@ -1238,6 +1247,7 @@
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
+      data_size,
       osr,
       roots,
       codegen->GetGraph()->HasShouldDeoptimizeFlag(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index a211c54..d6da73c 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -23,6 +23,7 @@
 #include <type_traits>
 
 #include "atomic.h"
+#include "globals.h"
 
 namespace art {
 
@@ -86,6 +87,10 @@
   kNotInlinedWont,
   kNotInlinedRecursiveBudget,
   kNotInlinedProxy,
+  kConstructorFenceGeneratedNew,
+  kConstructorFenceGeneratedFinal,
+  kConstructorFenceRemovedLSE,
+  kConstructorFenceRemovedPFRA,
   kLastStat
 };
 
@@ -202,6 +207,10 @@
       case kNotInlinedWont: name = "NotInlinedWont"; break;
       case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
       case kNotInlinedProxy: name = "NotInlinedProxy"; break;
+      case kConstructorFenceGeneratedNew: name = "ConstructorFenceGeneratedNew"; break;
+      case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break;
+      case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break;
+      case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
@@ -216,6 +225,14 @@
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
 };
 
+inline void MaybeRecordStat(OptimizingCompilerStats* compiler_stats,
+                            MethodCompilationStat stat,
+                            uint32_t count = 1) {
+  if (compiler_stats != nullptr) {
+    compiler_stats->RecordStat(stat, count);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index aa42fd6..5de707a 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -17,6 +17,7 @@
 #include "prepare_for_register_allocation.h"
 
 #include "jni_internal.h"
+#include "optimizing_compiler_stats.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -190,8 +191,9 @@
       // TODO: GetAssociatedAllocation should not care about multiple inputs
       // if we are in prepare_for_register_allocation pass only.
       constructor_fence->GetBlock()->RemoveInstruction(constructor_fence);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedPFRA);
       return;
-      // TODO: actually remove the dmb from the .S entrypoints (initialized variants only).
     }
 
     // HNewArray does not need this check because the art_quick_alloc_array does not itself
@@ -208,8 +210,8 @@
 
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
-    DCHECK(last_input != nullptr)
+    HInstruction* last_input = invoke->GetInputs().back();
+    DCHECK(last_input->IsLoadClass())
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
     // Detach the explicit class initialization check from the invoke.
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 395d4ba..2c64f01 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -21,6 +21,8 @@
 
 namespace art {
 
+class OptimizingCompilerStats;
+
 /**
  * A simplification pass over the graph before doing register allocation.
  * For example it changes uses of null checks and bounds checks to the original
@@ -28,7 +30,9 @@
  */
 class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
  public:
-  explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphDelegateVisitor(graph) {}
+  explicit PrepareForRegisterAllocation(HGraph* graph,
+                                        OptimizingCompilerStats* stats = nullptr)
+      : HGraphDelegateVisitor(graph, stats) {}
 
   void Run();
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 561c9ea..93613a5 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -754,8 +754,23 @@
   }
 }
 
+void ReferenceTypePropagation::FixUpInstructionType(HInstruction* instruction,
+                                                    VariableSizedHandleScope* handle_scope) {
+  if (instruction->IsSelect()) {
+    ScopedObjectAccess soa(Thread::Current());
+    HandleCache handle_cache(handle_scope);
+    HSelect* select = instruction->AsSelect();
+    ReferenceTypeInfo false_rti = select->GetFalseValue()->GetReferenceTypeInfo();
+    ReferenceTypeInfo true_rti = select->GetTrueValue()->GetReferenceTypeInfo();
+    select->SetReferenceTypeInfo(MergeTypes(false_rti, true_rti, &handle_cache));
+  } else {
+    LOG(FATAL) << "Invalid instruction in FixUpInstructionType";
+  }
+}
+
 ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
-                                                       const ReferenceTypeInfo& b) {
+                                                       const ReferenceTypeInfo& b,
+                                                       HandleCache* handle_cache) {
   if (!b.IsValid()) {
     return a;
   }
@@ -780,7 +795,7 @@
     is_exact = false;
   } else if (!a_is_interface && !b_is_interface) {
     result_type_handle =
-        handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
+        handle_cache->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
     is_exact = false;
   } else {
     // This can happen if:
@@ -790,7 +805,7 @@
     //        void foo(Interface i, boolean cond) {
     //          Object o = cond ? i : new Object();
     //        }
-    result_type_handle = handle_cache_.GetObjectClassHandle();
+    result_type_handle = handle_cache->GetObjectClassHandle();
     is_exact = false;
   }
 
@@ -916,7 +931,7 @@
     if (inputs[i]->IsNullConstant()) {
       continue;
     }
-    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo());
+    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo(), &handle_cache_);
     if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
         break;
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index b19f473..c221282 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -54,6 +54,12 @@
 
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
+  // Fix the reference type for an instruction whose inputs have changed.
+  // For a select instruction, the reference types of the inputs are merged
+  // and the resulting reference type is set on the select instruction.
+  static void FixUpInstructionType(HInstruction* instruction,
+                                   VariableSizedHandleScope* handle_scope);
+
  private:
   class HandleCache {
    public:
@@ -101,7 +107,9 @@
   static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
+  static ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
+                                      const ReferenceTypeInfo& b,
+                                      HandleCache* handle_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void ValidateTypes();
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index d537459..cb2af91 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -49,7 +49,7 @@
   // Relay method to merge type in reference type propagation.
   ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
                                const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) {
-    return propagation_->MergeTypes(a, b);
+    return propagation_->MergeTypes(a, b, &propagation_->handle_cache_);
   }
 
   // Helper method to construct an invalid type.
@@ -163,4 +163,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 5ad011d..38cd51b 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -554,6 +554,14 @@
 }
 
 void HScheduler::Schedule(HGraph* graph) {
+  // We run lsa here instead of in a separate pass to better control whether we
+  // should run the analysis or not.
+  LoadStoreAnalysis lsa(graph);
+  if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
+    lsa.Run();
+    scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector());
+  }
+
   for (HBasicBlock* block : graph->GetReversePostOrder()) {
     if (IsSchedulable(block)) {
       Schedule(block);
@@ -566,14 +574,6 @@
 
   // Build the scheduling graph.
   scheduling_graph_.Clear();
-
-  // Only perform LSA/HeapLocation analysis on the basic block that
-  // is going to get instruction scheduled.
-  HeapLocationCollector heap_location_collector(block->GetGraph());
-  heap_location_collector.VisitBasicBlock(block);
-  heap_location_collector.BuildAliasingMatrix();
-  scheduling_graph_.SetHeapLocationCollector(heap_location_collector);
-
   for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* instruction = it.Current();
     CHECK_EQ(instruction->GetBlock(), block)
@@ -724,8 +724,8 @@
       instruction->IsClassTableGet() ||
       instruction->IsCurrentMethod() ||
       instruction->IsDivZeroCheck() ||
-      instruction->IsInstanceFieldGet() ||
-      instruction->IsInstanceFieldSet() ||
+      (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
+      (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
       instruction->IsInstanceOf() ||
       instruction->IsInvokeInterface() ||
       instruction->IsInvokeStaticOrDirect() ||
@@ -741,14 +741,10 @@
       instruction->IsReturn() ||
       instruction->IsReturnVoid() ||
       instruction->IsSelect() ||
-      instruction->IsStaticFieldGet() ||
-      instruction->IsStaticFieldSet() ||
+      (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
+      (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
       instruction->IsSuspendCheck() ||
-      instruction->IsTypeConversion() ||
-      instruction->IsUnresolvedInstanceFieldGet() ||
-      instruction->IsUnresolvedInstanceFieldSet() ||
-      instruction->IsUnresolvedStaticFieldGet() ||
-      instruction->IsUnresolvedStaticFieldSet();
+      instruction->IsTypeConversion();
 }
 
 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index ea15790..d6eb6e3 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -20,6 +20,7 @@
 #include "code_generator_utils.h"
 #include "common_arm.h"
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace arm {
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index f54d3f3..510619f 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -18,6 +18,7 @@
 
 #include "code_generator_utils.h"
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace arm64 {
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 46d0d0e..e220d32 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -20,9 +20,16 @@
 
 static constexpr size_t kMaxInstructionsInBranch = 1u;
 
-// Returns true if `block` has only one predecessor, ends with a Goto and
-// contains at most `kMaxInstructionsInBranch` other movable instruction with
-// no side-effects.
+HSelectGenerator::HSelectGenerator(HGraph* graph,
+                                   VariableSizedHandleScope* handles,
+                                   OptimizingCompilerStats* stats)
+    : HOptimization(graph, kSelectGeneratorPassName, stats),
+      handle_scope_(handles) {
+}
+
+// Returns true if `block` has only one predecessor, ends with a Goto
+// or a Return and contains at most `kMaxInstructionsInBranch` other
+// movable instruction with no side-effects.
 static bool IsSimpleBlock(HBasicBlock* block) {
   if (block->GetPredecessors().size() != 1u) {
     return false;
@@ -33,7 +40,10 @@
   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* instruction = it.Current();
     if (instruction->IsControlFlow()) {
-      return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch;
+      if (num_instructions > kMaxInstructionsInBranch) {
+        return false;
+      }
+      return instruction->IsGoto() || instruction->IsReturn();
     } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
       num_instructions++;
     } else {
@@ -45,8 +55,8 @@
   UNREACHABLE();
 }
 
-// Returns true if 'block1' and 'block2' are empty, merge into the same single
-// successor and the successor can only be reached from them.
+// Returns true if 'block1' and 'block2' are empty and merge into the
+// same single successor.
 static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
   return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
 }
@@ -94,53 +104,73 @@
     // If the branches are not empty, move instructions in front of the If.
     // TODO(dbrazdil): This puts an instruction between If and its condition.
     //                 Implement moving of conditions to first users if possible.
-    if (!true_block->IsSingleGoto()) {
+    if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
       true_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
-    if (!false_block->IsSingleGoto()) {
+    if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
       false_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
-    DCHECK(true_block->IsSingleGoto());
-    DCHECK(false_block->IsSingleGoto());
+    DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
+    DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
 
     // Find the resulting true/false values.
     size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
     size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
     DCHECK_NE(predecessor_index_true, predecessor_index_false);
 
+    bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
     HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
-    if (phi == nullptr) {
+
+    HInstruction* true_value = nullptr;
+    HInstruction* false_value = nullptr;
+    if (both_successors_return) {
+      true_value = true_block->GetFirstInstruction()->InputAt(0);
+      false_value = false_block->GetFirstInstruction()->InputAt(0);
+    } else if (phi != nullptr) {
+      true_value = phi->InputAt(predecessor_index_true);
+      false_value = phi->InputAt(predecessor_index_false);
+    } else {
       continue;
     }
-    HInstruction* true_value = phi->InputAt(predecessor_index_true);
-    HInstruction* false_value = phi->InputAt(predecessor_index_false);
+    DCHECK(both_successors_return || phi != nullptr);
 
     // Create the Select instruction and insert it in front of the If.
     HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0),
                                                        true_value,
                                                        false_value,
                                                        if_instruction->GetDexPc());
-    if (phi->GetType() == Primitive::kPrimNot) {
+    if (both_successors_return) {
+      if (true_value->GetType() == Primitive::kPrimNot) {
+        DCHECK(false_value->GetType() == Primitive::kPrimNot);
+        ReferenceTypePropagation::FixUpInstructionType(select, handle_scope_);
+      }
+    } else if (phi->GetType() == Primitive::kPrimNot) {
       select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
     }
     block->InsertInstructionBefore(select, if_instruction);
 
-    // Remove the true branch which removes the corresponding Phi input.
-    // If left only with the false branch, the Phi is automatically removed.
-    phi->ReplaceInput(select, predecessor_index_false);
+    // Remove the true branch which removes the corresponding Phi
+    // input if needed. If left only with the false branch, the Phi is
+    // automatically removed.
+    if (both_successors_return) {
+      false_block->GetFirstInstruction()->ReplaceInput(select, 0);
+    } else {
+      phi->ReplaceInput(select, predecessor_index_false);
+    }
+
     bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
     true_block->DisconnectAndDelete();
-    DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
 
     // Merge remaining blocks which are now connected with Goto.
     DCHECK_EQ(block->GetSingleSuccessor(), false_block);
     block->MergeWith(false_block);
-    if (only_two_predecessors) {
+    if (!both_successors_return && only_two_predecessors) {
+      DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
       DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
       block->MergeWith(merge_block);
     }
 
-    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+    MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
 
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index c6dca58..c060146 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -18,7 +18,7 @@
  * This optimization recognizes the common diamond selection pattern and
  * replaces it with an instance of the HSelect instruction.
  *
- * Recognized pattern:
+ * Recognized patterns:
  *
  *          If [ Condition ]
  *            /          \
@@ -26,14 +26,30 @@
  *            \          /
  *     Phi [FalseValue, TrueValue]
  *
+ * and
+ *
+ *             If [ Condition ]
+ *               /          \
+ *     false branch        true branch
+ *     return FalseValue   return TrueValue
+ *
  * The pattern will be simplified if `true_branch` and `false_branch` each
  * contain at most one instruction without any side effects.
  *
- * Blocks are merged into one and Select replaces the If and the Phi:
+ * Blocks are merged into one and Select replaces the If and the Phi.
+ *
+ * For the first pattern it simplifies to:
+ *
  *              true branch
  *              false branch
  *              Select [FalseValue, TrueValue, Condition]
  *
+ * For the second pattern it simplifies to:
+ *
+ *              true branch
+ *              false branch
+ *              return Select [FalseValue, TrueValue, Condition]
+ *
  * Note: In order to recognize no side-effect blocks, this optimization must be
  * run after the instruction simplifier has removed redundant suspend checks.
  */
@@ -42,19 +58,22 @@
 #define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
 
 #include "optimization.h"
+#include "reference_type_propagation.h"
 
 namespace art {
 
 class HSelectGenerator : public HOptimization {
  public:
-  HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kSelectGeneratorPassName, stats) {}
+  HSelectGenerator(HGraph* graph,
+                   VariableSizedHandleScope* handles,
+                   OptimizingCompilerStats* stats);
 
   void Run() OVERRIDE;
 
   static constexpr const char* kSelectGeneratorPassName = "select_generator";
 
  private:
+  VariableSizedHandleScope* handle_scope_;
   DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
 };
 
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index af3b447..9df1b74 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -82,6 +82,22 @@
   }
 }
 
+void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int code) {
+  // The Marking Register is only used in the Baker read barrier configuration.
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  vixl32::Label mr_is_ok;
+
+  // temp = self.tls32_.is.gc_marking
+  ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+  // Check that mr == self.tls32_.is.gc_marking.
+  ___ Cmp(mr, temp);
+  ___ B(eq, &mr_is_ok, /* far_target */ false);
+  ___ Bkpt(code);
+  ___ Bind(&mr_is_ok);
+}
+
 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
   // TODO(VIXL): Implement this optimization in VIXL.
   if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 66b22ea..9c11fd3 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -178,6 +178,7 @@
   //
   // Heap poisoning.
   //
+
   // Poison a heap reference contained in `reg`.
   void PoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg`.
@@ -187,6 +188,15 @@
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl32::Register reg);
 
+  // Emit code checking the status of the Marking Register, and aborting
+  // the program if MR does not match the value stored in the art::Thread
+  // object.
+  //
+  // Argument `temp` is used as a temporary register to generate code.
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck and is passed to the BKPT instruction.
+  void GenerateMarkingRegisterCheck(vixl32::Register temp, int code = 0);
+
   void StoreToOffset(StoreOperandType type,
                      vixl32::Register reg,
                      vixl32::Register base,
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 6ed0e9b..d8a48a5 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -158,6 +158,24 @@
   }
 }
 
+void Arm64Assembler::GenerateMarkingRegisterCheck(Register temp, int code) {
+  // The Marking Register is only used in the Baker read barrier configuration.
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  vixl::aarch64::Register mr = reg_x(MR);  // Marking Register.
+  vixl::aarch64::Register tr = reg_x(TR);  // Thread Register.
+  vixl::aarch64::Label mr_is_ok;
+
+  // temp = self.tls32_.is.gc_marking
+  ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+  // Check that mr == self.tls32_.is.gc_marking.
+  ___ Cmp(mr.W(), temp);
+  ___ B(eq, &mr_is_ok);
+  ___ Brk(code);
+  ___ Bind(&mr_is_ok);
+}
+
 #undef ___
 
 }  // namespace arm64
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 5b8a34e..6b28363 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -98,6 +98,15 @@
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
 
+  // Emit code checking the status of the Marking Register, and aborting
+  // the program if MR does not match the value stored in the art::Thread
+  // object.
+  //
+  // Argument `temp` is used as a temporary register to generate code.
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction.
+  void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0);
+
   void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64";
   }
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index bab84be..9732b76 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -662,7 +662,7 @@
   ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64());
 }
 
-void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
+void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception* exception) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
   Register temp = temps.AcquireX();
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 59a1a48..a8ca111 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -216,8 +216,15 @@
    */
   virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
 
+  void SetEmitRunTimeChecksInDebugMode(bool value) {
+    emit_run_time_checks_in_debug_mode_ = value;
+  }
+
  protected:
-  explicit JNIMacroAssembler() {}
+  JNIMacroAssembler() {}
+
+  // Should run-time checks be emitted in debug mode?
+  bool emit_run_time_checks_in_debug_mode_ = false;
 };
 
 // A "Label" class used with the JNIMacroAssembler
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b50f1af..b89af10 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1606,6 +1606,42 @@
 }
 
 
+void X86Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x68);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x69);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x6A);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x6D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
   DCHECK(shift_count.is_uint8());
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8578340..511eeb9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -546,6 +546,11 @@
   void punpckldq(XmmRegister dst, XmmRegister src);
   void punpcklqdq(XmmRegister dst, XmmRegister src);
 
+  void punpckhbw(XmmRegister dst, XmmRegister src);
+  void punpckhwd(XmmRegister dst, XmmRegister src);
+  void punpckhdq(XmmRegister dst, XmmRegister src);
+  void punpckhqdq(XmmRegister dst, XmmRegister src);
+
   void psllw(XmmRegister reg, const Immediate& shift_count);
   void pslld(XmmRegister reg, const Immediate& shift_count);
   void psllq(XmmRegister reg, const Immediate& shift_count);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 3e1244e..d2122db 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -777,6 +777,22 @@
   DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
 }
 
+TEST_F(AssemblerX86Test, Punpckhbw) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86Test, Punpckhwd) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86Test, Punpckhdq) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86Test, Punpckhqdq) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
 TEST_F(AssemblerX86Test, psllw) {
   GetAssembler()->psllw(x86::XMM0, CreateImmediate(16));
   DriverStr("psllw $0x10, %xmm0\n", "psllwi");
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ea69a1c..3bff67d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1835,6 +1835,46 @@
 }
 
 
+void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x68);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x69);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x6A);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x6D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
   DCHECK(shift_count.is_uint8());
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1931,6 +1971,18 @@
 }
 
 
+void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
+  DCHECK(shift_count.is_uint8());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0x73);
+  EmitXmmRegisterOperand(3, reg);
+  EmitUint8(shift_count.value());
+}
+
+
 void X86_64Assembler::fldl(const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xDD);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 41450bf..3dab235 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -574,6 +574,11 @@
   void punpckldq(XmmRegister dst, XmmRegister src);
   void punpcklqdq(XmmRegister dst, XmmRegister src);
 
+  void punpckhbw(XmmRegister dst, XmmRegister src);
+  void punpckhwd(XmmRegister dst, XmmRegister src);
+  void punpckhdq(XmmRegister dst, XmmRegister src);
+  void punpckhqdq(XmmRegister dst, XmmRegister src);
+
   void psllw(XmmRegister reg, const Immediate& shift_count);
   void pslld(XmmRegister reg, const Immediate& shift_count);
   void psllq(XmmRegister reg, const Immediate& shift_count);
@@ -585,6 +590,7 @@
   void psrlw(XmmRegister reg, const Immediate& shift_count);
   void psrld(XmmRegister reg, const Immediate& shift_count);
   void psrlq(XmmRegister reg, const Immediate& shift_count);
+  void psrldq(XmmRegister reg, const Immediate& shift_count);
 
   void flds(const Address& src);
   void fstps(const Address& dst);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index ec14e7a..85afee0 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1465,6 +1465,22 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
 }
 
+TEST_F(AssemblerX86_64Test, Punpckhbw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhwd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhdq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhqdq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
 TEST_F(AssemblerX86_64Test, Psllw) {
   GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0),  x86_64::Immediate(1));
   GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
@@ -1521,6 +1537,13 @@
             "psrlq $2, %xmm15\n", "pslrqi");
 }
 
+TEST_F(AssemblerX86_64Test, Psrldq) {
+  GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0),  x86_64::Immediate(1));
+  GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+  DriverStr("psrldq $1, %xmm0\n"
+            "psrldq $2, %xmm15\n", "pslrdqi");
+}
+
 TEST_F(AssemblerX86_64Test, UcomissAddress) {
   GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
@@ -2012,7 +2035,7 @@
   x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI);
 
   size_t frame_size = 10 * kStackAlignment;
-  assembler->BuildFrame(10 * kStackAlignment, method_reg, spill_regs, entry_spills);
+  assembler->BuildFrame(frame_size, method_reg, spill_regs, entry_spills);
 
   // Construct assembly text counterpart.
   std::ostringstream str;
@@ -2048,7 +2071,7 @@
   ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   size_t frame_size = 10 * kStackAlignment;
-  assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
+  assembler->RemoveFrame(frame_size, spill_regs);
 
   // Construct assembly text counterpart.
   std::ostringstream str;
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 72e2a6c..6538925 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -87,13 +87,13 @@
     TimingLogger timings("Verify", false, false);
     // The compiler driver handles the verifier deps in the callbacks, so
     // remove what this class did for unit testing.
-    verifier_deps_.reset(nullptr);
+    if (deps == nullptr) {
+      // Create some verifier deps by default if they are not already specified.
+      deps = new verifier::VerifierDeps(dex_files_);
+      verifier_deps_.reset(deps);
+    }
     callbacks_->SetVerifierDeps(deps);
     compiler_driver_->Verify(class_loader_, dex_files_, &timings);
-    // The compiler driver may have updated the VerifierDeps in the callback object.
-    if (callbacks_->GetVerifierDeps() != deps) {
-      verifier_deps_.reset(callbacks_->GetVerifierDeps());
-    }
     callbacks_->SetVerifierDeps(nullptr);
     // Clear entries in the verification results to avoid hitting a DCHECK that
     // we always succeed inserting a new entry after verifying.
@@ -128,6 +128,7 @@
     for (const DexFile* dex_file : dex_files_) {
       compiler_driver_->GetVerificationResults()->AddDexFile(dex_file);
     }
+    compiler_driver_->SetDexFilesForOatFile(dex_files_);
   }
 
   void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -228,8 +229,7 @@
         hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_)));
     MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
     for (const DexFile* dex_file : dex_files_) {
-      const std::vector<dex::TypeIndex>& unverified_classes = deps.GetUnverifiedClasses(*dex_file);
-      std::set<dex::TypeIndex> set(unverified_classes.begin(), unverified_classes.end());
+      const std::set<dex::TypeIndex>& unverified_classes = deps.GetUnverifiedClasses(*dex_file);
       for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
         const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
         const char* descriptor = dex_file->GetClassDescriptor(class_def);
@@ -237,7 +237,7 @@
         if (cls == nullptr) {
           CHECK(soa.Self()->IsExceptionPending());
           soa.Self()->ClearException();
-        } else if (set.find(class_def.class_idx_) == set.end()) {
+        } else if (unverified_classes.find(class_def.class_idx_) == unverified_classes.end()) {
           ASSERT_EQ(cls->GetStatus(), mirror::Class::kStatusVerified);
         } else {
           ASSERT_LT(cls->GetStatus(), mirror::Class::kStatusVerified);
@@ -1144,6 +1144,39 @@
   ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;"));
 }
 
+TEST_F(VerifierDepsTest, UnverifiedOrder) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject loader = LoadDex("VerifierDeps");
+  std::vector<const DexFile*> dex_files = GetDexFiles(loader);
+  ASSERT_GT(dex_files.size(), 0u);
+  const DexFile* dex_file = dex_files[0];
+  VerifierDeps deps1(dex_files);
+  Thread* const self = Thread::Current();
+  ASSERT_TRUE(self->GetVerifierDeps() == nullptr);
+  self->SetVerifierDeps(&deps1);
+  deps1.MaybeRecordVerificationStatus(*dex_file,
+                                      dex::TypeIndex(0),
+                                      verifier::FailureKind::kHardFailure);
+  deps1.MaybeRecordVerificationStatus(*dex_file,
+                                      dex::TypeIndex(1),
+                                      verifier::FailureKind::kHardFailure);
+  VerifierDeps deps2(dex_files);
+  self->SetVerifierDeps(nullptr);
+  self->SetVerifierDeps(&deps2);
+  deps2.MaybeRecordVerificationStatus(*dex_file,
+                                      dex::TypeIndex(1),
+                                      verifier::FailureKind::kHardFailure);
+  deps2.MaybeRecordVerificationStatus(*dex_file,
+                                      dex::TypeIndex(0),
+                                      verifier::FailureKind::kHardFailure);
+  self->SetVerifierDeps(nullptr);
+  std::vector<uint8_t> buffer1;
+  deps1.Encode(dex_files, &buffer1);
+  std::vector<uint8_t> buffer2;
+  deps2.Encode(dex_files, &buffer2);
+  EXPECT_EQ(buffer1, buffer2);
+}
+
 TEST_F(VerifierDepsTest, VerifyDeps) {
   VerifyDexFile();
 
@@ -1441,7 +1474,6 @@
         ASSERT_FALSE(verifier_deps_ == nullptr);
         ASSERT_FALSE(verifier_deps_->Equals(decoded_deps));
       } else {
-        ASSERT_TRUE(verifier_deps_ == nullptr);
         VerifyClassStatus(decoded_deps);
       }
     }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 3cc41a6..e3e0180 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -65,8 +65,10 @@
 #include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
+#include "gc/verification.h"
 #include "image_writer.h"
 #include "interpreter/unstarted_runtime.h"
+#include "java_vm_ext.h"
 #include "jit/profile_compilation_info.h"
 #include "leb128.h"
 #include "linker/buffered_output_stream.h"
@@ -353,6 +355,9 @@
   UsageError("");
   UsageError("  --debuggable: Produce code debuggable with Java debugger.");
   UsageError("");
+  UsageError("  --avoid-storing-invocation: Avoid storing the invocation args in the key value");
+  UsageError("      store. Used to test determinism with different args.");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
@@ -406,17 +411,17 @@
   UsageError("");
   UsageError("  --class-loader-context=<string spec>: a string specifying the intended");
   UsageError("      runtime loading context for the compiled dex files.");
-  UsageError("      ");
+  UsageError("");
   UsageError("      It describes how the class loader chain should be built in order to ensure");
   UsageError("      classes are resolved during dex2aot as they would be resolved at runtime.");
   UsageError("      This spec will be encoded in the oat file. If at runtime the dex file is");
   UsageError("      loaded in a different context, the oat file will be rejected.");
-  UsageError("      ");
+  UsageError("");
   UsageError("      The chain is interpreted in the natural 'parent order', meaning that class");
   UsageError("      loader 'i+1' will be the parent of class loader 'i'.");
   UsageError("      The compilation sources will be appended to the classpath of the first class");
   UsageError("      loader.");
-  UsageError("      ");
+  UsageError("");
   UsageError("      E.g. if the context is 'PCL[lib1.dex];DLC[lib2.dex]' and ");
   UsageError("      --dex-file=src.dex then dex2oat will setup a PathClassLoader with classpath ");
   UsageError("      'lib1.dex:src.dex' and set its parent to a DelegateLastClassLoader with ");
@@ -426,9 +431,12 @@
   UsageError("      with --dex-file are found in the classpath. The source dex files will be");
   UsageError("      removed from any class loader's classpath possibly resulting in empty");
   UsageError("      class loaders.");
-  UsageError("      ");
+  UsageError("");
   UsageError("      Example: --class-loader-context=PCL[lib1.dex:lib2.dex];DLC[lib3.dex]");
   UsageError("");
+  UsageError("  --dirty-image-objects=<directory-path>: list of known dirty objects in the image.");
+  UsageError("      The image writer will group them together.");
+  UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
 }
@@ -591,9 +599,9 @@
       compiled_methods_zip_filename_(nullptr),
       compiled_methods_filename_(nullptr),
       passes_to_run_filename_(nullptr),
+      dirty_image_objects_filename_(nullptr),
       multi_image_(false),
       is_host_(false),
-      class_loader_(nullptr),
       elf_writers_(),
       oat_writers_(),
       rodata_(),
@@ -606,6 +614,7 @@
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
+      avoid_storing_invocation_(false),
       swap_fd_(kInvalidFd),
       app_image_fd_(kInvalidFd),
       profile_file_fd_(kInvalidFd),
@@ -764,6 +773,11 @@
     compiler_options_->boot_image_ = !image_filenames_.empty();
     compiler_options_->app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
 
+    if (IsBootImage() && image_filenames_.size() == 1) {
+      const std::string& boot_image_filename = image_filenames_[0];
+      compiler_options_->core_image_ = CompilerDriver::IsCoreImageFilename(boot_image_filename);
+    }
+
     if (IsAppImage() && IsBootImage()) {
       Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
     }
@@ -1128,14 +1142,16 @@
 
   void InsertCompileOptions(int argc, char** argv) {
     std::ostringstream oss;
-    for (int i = 0; i < argc; ++i) {
-      if (i > 0) {
-        oss << ' ';
+    if (!avoid_storing_invocation_) {
+      for (int i = 0; i < argc; ++i) {
+        if (i > 0) {
+          oss << ' ';
+        }
+        oss << argv[i];
       }
-      oss << argv[i];
+      key_value_store_->Put(OatHeader::kDex2OatCmdLineKey, oss.str());
+      oss.str("");  // Reset.
     }
-    key_value_store_->Put(OatHeader::kDex2OatCmdLineKey, oss.str());
-    oss.str("");  // Reset.
     oss << kRuntimeISA;
     key_value_store_->Put(OatHeader::kDex2OatHostKey, oss.str());
     key_value_store_->Put(
@@ -1266,6 +1282,8 @@
         dump_passes_ = true;
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
+      } else if (option == "--avoid-storing-invocation") {
+        avoid_storing_invocation_ = true;
       } else if (option.starts_with("--swap-file=")) {
         swap_file_name_ = option.substr(strlen("--swap-file=")).data();
       } else if (option.starts_with("--swap-fd=")) {
@@ -1303,9 +1321,11 @@
       } else if (option.starts_with("--class-loader-context=")) {
         class_loader_context_ = ClassLoaderContext::Create(
             option.substr(strlen("--class-loader-context=")).data());
-        if (class_loader_context_== nullptr) {
+        if (class_loader_context_ == nullptr) {
           Usage("Option --class-loader-context has an incorrect format: %s", option.data());
         }
+      } else if (option.starts_with("--dirty-image-objects=")) {
+        dirty_image_objects_filename_ = option.substr(strlen("--dirty-image-objects=")).data();
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
@@ -1484,14 +1504,6 @@
     }
   }
 
-  void Shutdown() {
-    ScopedObjectAccess soa(Thread::Current());
-    for (jobject dex_cache : dex_caches_) {
-      soa.Env()->DeleteLocalRef(dex_cache);
-    }
-    dex_caches_.clear();
-  }
-
   void LoadClassProfileDescriptors() {
     if (profile_compilation_info_ != nullptr && IsImage()) {
       Runtime* runtime = Runtime::Current();
@@ -1515,7 +1527,8 @@
   dex2oat::ReturnCode Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
 
-    if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) {
+    if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods() ||
+        !PrepareDirtyObjects()) {
       return dex2oat::ReturnCode::kOther;
     }
 
@@ -1576,20 +1589,12 @@
       }
 
       // Open dex files for class path.
+
       if (class_loader_context_ == nullptr) {
-        // TODO(calin): Temporary workaround while we transition to use
-        // --class-loader-context instead of --runtime-arg -cp
-        if (runtime_->GetClassPathString().empty()) {
-          class_loader_context_ = std::unique_ptr<ClassLoaderContext>(
-              new ClassLoaderContext());
-        } else {
-          std::string spec = runtime_->GetClassPathString() == OatFile::kSpecialSharedLibrary
-              ? OatFile::kSpecialSharedLibrary
-              : "PCL[" + runtime_->GetClassPathString() + "]";
-          class_loader_context_ = ClassLoaderContext::Create(spec);
-        }
+        // If no context was specified use the default one (which is an empty PathClassLoader).
+        class_loader_context_ = std::unique_ptr<ClassLoaderContext>(ClassLoaderContext::Default());
       }
-      CHECK(class_loader_context_ != nullptr);
+
       DCHECK_EQ(oat_writers_.size(), 1u);
 
       // Note: Ideally we would reject context where the source dex files are also
@@ -1660,6 +1665,8 @@
 
     // If we need to downgrade the compiler-filter for size reasons.
     if (!IsBootImage() && IsVeryLarge(dex_files_)) {
+      // If we need to downgrade the compiler-filter for size reasons, do that early before we read
+      // it below for creating verification callbacks.
       if (!CompilerFilter::IsAsGoodAs(kLargeAppFilter, compiler_options_->GetCompilerFilter())) {
         LOG(INFO) << "Very large app, downgrading to verify.";
         // Note: this change won't be reflected in the key-value store, as that had to be
@@ -1712,13 +1719,11 @@
     Thread* self = Thread::Current();
     WellKnownClasses::Init(self->GetJniEnv());
 
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
     if (!IsBootImage()) {
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
         SaveDexInput();
       }
-      class_loader_ = class_loader_context_->CreateClassLoader(dex_files_);
     }
 
     // Ensure opened dex files are writable for dex-to-dex transformations.
@@ -1729,24 +1734,12 @@
       }
     }
 
-    // Ensure that the dex caches stay live since we don't want class unloading
-    // to occur during compilation.
-    for (const auto& dex_file : dex_files_) {
-      ScopedObjectAccess soa(self);
-      dex_caches_.push_back(soa.AddLocalReference<jobject>(
-          class_linker->RegisterDexFile(*dex_file,
-                                        soa.Decode<mirror::ClassLoader>(class_loader_).Ptr())));
-      if (dex_caches_.back() == nullptr) {
-        soa.Self()->AssertPendingException();
-        soa.Self()->ClearException();
-        PLOG(ERROR) << "Failed to register dex file.";
-        return dex2oat::ReturnCode::kOther;
-      }
-      // Pre-register dex files so that we can access verification results without locks during
-      // compilation and verification.
-      if (verification_results_ != nullptr) {
-        // Verification results are only required for modes that have any compilation. Avoid
-        // adding the dex files if possible to prevent allocating large arrays.
+    // Verification results are only required for modes that have any compilation. Avoid
+    // adding the dex files if possible to prevent allocating large arrays.
+    if (verification_results_ != nullptr) {
+      for (const auto& dex_file : dex_files_) {
+        // Pre-register dex files so that we can access verification results without locks during
+        // compilation and verification.
         verification_results_->AddDexFile(dex_file);
       }
     }
@@ -1759,13 +1752,52 @@
     return IsImage() && oat_fd_ != kInvalidFd;
   }
 
-  // Create and invoke the compiler driver. This will compile all the dex files.
-  void Compile() {
+  // Doesn't return the class loader since it's not meant to be used for image compilation.
+  void CompileDexFilesIndividually() {
+    CHECK(!IsImage()) << "Not supported with image";
+    for (const DexFile* dex_file : dex_files_) {
+      std::vector<const DexFile*> dex_files(1u, dex_file);
+      VLOG(compiler) << "Compiling " << dex_file->GetLocation();
+      jobject class_loader = CompileDexFiles(dex_files);
+      CHECK(class_loader != nullptr);
+      ScopedObjectAccess soa(Thread::Current());
+      // Unload class loader to free RAM.
+      jweak weak_class_loader = soa.Env()->vm->AddWeakGlobalRef(
+          soa.Self(),
+          soa.Decode<mirror::ClassLoader>(class_loader));
+      soa.Env()->vm->DeleteGlobalRef(soa.Self(), class_loader);
+      runtime_->GetHeap()->CollectGarbage(/*clear_soft_references*/ true);
+      ObjPtr<mirror::ClassLoader> decoded_weak = soa.Decode<mirror::ClassLoader>(weak_class_loader);
+      if (decoded_weak != nullptr) {
+        LOG(FATAL) << "Failed to unload class loader, path from root set: "
+                   << runtime_->GetHeap()->GetVerification()->FirstPathFromRootSet(decoded_weak);
+      }
+      VLOG(compiler) << "Unloaded classloader";
+    }
+  }
+
+  bool ShouldCompileDexFilesIndividually() const {
+    // Compile individually if we are:
+    // 1. not building an image,
+    // 2. not verifying a vdex file,
+    // 3. using multidex,
+    // 4. not doing any AOT compilation.
+    // This means extract, no-vdex verify, and quicken, will use the individual compilation
+    // mode (to reduce RAM used by the compiler).
+    return !IsImage() &&
+        !update_input_vdex_ &&
+        dex_files_.size() > 1 &&
+        !CompilerFilter::IsAotCompilationEnabled(compiler_options_->GetCompilerFilter());
+  }
+
+  // Set up and create the compiler driver and then invoke it to compile all the dex files.
+  jobject Compile() {
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
     compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
     // Find the dex files we should not inline from.
-
     std::vector<std::string> no_inline_filters;
     Split(no_inline_from_string_, ',', &no_inline_filters);
 
@@ -1776,7 +1808,6 @@
     }
 
     if (!no_inline_filters.empty()) {
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       std::vector<const DexFile*> class_path_files;
       if (!IsBootImage()) {
         // The class loader context is used only for apps.
@@ -1832,6 +1863,16 @@
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
 
+    const bool compile_individually = ShouldCompileDexFilesIndividually();
+    if (compile_individually) {
+      // Set the compiler driver in the callbacks so that we can avoid re-verification. This not
+      // only helps performance but also prevents reverifying quickened bytecodes. Attempting
+      // verify quickened bytecode causes verification failures.
+      // Only set the compiler filter if we are doing separate compilation since there is a bit
+      // of overhead when checking if a class was previously verified.
+      callbacks_->SetDoesClassUnloading(true, driver_.get());
+    }
+
     // Setup vdex for compilation.
     if (!DoEagerUnquickeningOfVdex() && input_vdex_file_ != nullptr) {
       callbacks_->SetVerifierDeps(
@@ -1842,8 +1883,46 @@
       // experimentation.
       TimingLogger::ScopedTiming time_unquicken("Unquicken", timings_);
       VdexFile::Unquicken(dex_files_, input_vdex_file_->GetQuickeningInfo());
+    } else {
+      // Create the main VerifierDeps, here instead of in the compiler since we want to aggregate
+      // the results for all the dex files, not just the results for the current dex file.
+      callbacks_->SetVerifierDeps(new verifier::VerifierDeps(dex_files_));
     }
-    driver_->CompileAll(class_loader_, dex_files_, timings_);
+    // Invoke the compilation.
+    if (compile_individually) {
+      CompileDexFilesIndividually();
+      // Return a null classloader since we already freed released it.
+      return nullptr;
+    }
+    return CompileDexFiles(dex_files_);
+  }
+
+  // Create the class loader, use it to compile, and return.
+  jobject CompileDexFiles(const std::vector<const DexFile*>& dex_files) {
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+
+    jobject class_loader = nullptr;
+    if (!IsBootImage()) {
+      class_loader = class_loader_context_->CreateClassLoader(dex_files_);
+    }
+
+    // Register dex caches and key them to the class loader so that they only unload when the
+    // class loader unloads.
+    for (const auto& dex_file : dex_files) {
+      ScopedObjectAccess soa(Thread::Current());
+      // Registering the dex cache adds a strong root in the class loader that prevents the dex
+      // cache from being unloaded early.
+      ObjPtr<mirror::DexCache> dex_cache = class_linker->RegisterDexFile(
+          *dex_file,
+          soa.Decode<mirror::ClassLoader>(class_loader));
+      if (dex_cache == nullptr) {
+        soa.Self()->AssertPendingException();
+        LOG(FATAL) << "Failed to register dex file " << dex_file->GetLocation() << " "
+                   << soa.Self()->GetException()->Dump();
+      }
+    }
+    driver_->CompileAll(class_loader, dex_files, timings_);
+    return class_loader;
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1947,7 +2026,8 @@
                                           IsAppImage(),
                                           image_storage_mode_,
                                           oat_filenames_,
-                                          dex_file_oat_index_map_));
+                                          dex_file_oat_index_map_,
+                                          dirty_image_objects_.get()));
 
       // We need to prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
@@ -2373,6 +2453,22 @@
     return true;
   }
 
+  bool PrepareDirtyObjects() {
+    if (dirty_image_objects_filename_ != nullptr) {
+      dirty_image_objects_.reset(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
+          dirty_image_objects_filename_,
+          nullptr));
+      if (dirty_image_objects_ == nullptr) {
+        LOG(ERROR) << "Failed to create list of dirty objects from '"
+            << dirty_image_objects_filename_ << "'";
+        return false;
+      }
+    } else {
+      dirty_image_objects_.reset(nullptr);
+    }
+    return true;
+  }
+
   void PruneNonExistentDexFiles() {
     DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
     size_t kept = 0u;
@@ -2790,9 +2886,11 @@
   const char* compiled_methods_zip_filename_;
   const char* compiled_methods_filename_;
   const char* passes_to_run_filename_;
+  const char* dirty_image_objects_filename_;
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_methods_;
+  std::unique_ptr<std::unordered_set<std::string>> dirty_image_objects_;
   std::unique_ptr<std::vector<std::string>> passes_to_run_;
   bool multi_image_;
   bool is_host_;
@@ -2800,8 +2898,6 @@
   // Dex files we are compiling, does not include the class path dex files.
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
-  std::vector<jobject> dex_caches_;
-  jobject class_loader_;
 
   std::vector<std::unique_ptr<ElfWriter>> elf_writers_;
   std::vector<std::unique_ptr<OatWriter>> oat_writers_;
@@ -2820,6 +2916,7 @@
   bool dump_passes_;
   bool dump_timing_;
   bool dump_slow_timing_;
+  bool avoid_storing_invocation_;
   std::string swap_file_name_;
   int swap_fd_;
   size_t min_dex_files_for_swap_ = kDefaultMinDexFilesForSwap;
@@ -2870,9 +2967,25 @@
 #endif
 }
 
+class ScopedGlobalRef {
+ public:
+  explicit ScopedGlobalRef(jobject obj) : obj_(obj) {}
+  ~ScopedGlobalRef() {
+    if (obj_ != nullptr) {
+      ScopedObjectAccess soa(Thread::Current());
+      soa.Env()->vm->DeleteGlobalRef(soa.Self(), obj_);
+    }
+  }
+
+ private:
+  jobject obj_;
+};
+
 static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) {
   dex2oat.LoadClassProfileDescriptors();
-  dex2oat.Compile();
+  // Keep the class loader that was used for compilation live for the rest of the compilation
+  // process.
+  ScopedGlobalRef class_loader(dex2oat.Compile());
 
   if (!dex2oat.WriteOutputFiles()) {
     dex2oat.EraseOutputFiles();
@@ -2920,7 +3033,9 @@
 }
 
 static dex2oat::ReturnCode CompileApp(Dex2Oat& dex2oat) {
-  dex2oat.Compile();
+  // Keep the class loader that was used for compilation live for the rest of the compilation
+  // process.
+  ScopedGlobalRef class_loader(dex2oat.Compile());
 
   if (!dex2oat.WriteOutputFiles()) {
     dex2oat.EraseOutputFiles();
@@ -3014,7 +3129,6 @@
     result = CompileApp(*dex2oat);
   }
 
-  dex2oat->Shutdown();
   return result;
 }
 }  // namespace art
diff --git a/dex2oat/dex2oat_image_test.cc b/dex2oat/dex2oat_image_test.cc
index 95fb16d..46c5f58 100644
--- a/dex2oat/dex2oat_image_test.cc
+++ b/dex2oat/dex2oat_image_test.cc
@@ -340,6 +340,15 @@
     // EXPECT_GE(profile_sizes.oat_size / kRatio, compiled_methods_sizes.oat_size);
     EXPECT_GE(profile_sizes.vdex_size / kRatio, compiled_methods_sizes.vdex_size);
   }
+  // Test dirty image objects.
+  {
+    ScratchFile classes;
+    GenerateClasses(classes.GetFile(), /*frequency*/ 1u);
+    image_classes_sizes = CompileImageAndGetSizes(
+        {"--dirty-image-objects=" + classes.GetFilename()});
+    classes.Close();
+    std::cout << "Dirty image object sizes " << image_classes_sizes << std::endl;
+  }
 }
 
 }  // namespace art
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 32877a8..0aa766c 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -41,6 +41,7 @@
 namespace art {
 
 static constexpr size_t kMaxMethodIds = 65535;
+static constexpr bool kDebugArgs = false;
 
 using android::base::StringPrintf;
 
@@ -55,7 +56,7 @@
   }
 
  protected:
-  int GenerateOdexForTestWithStatus(const std::string& dex_location,
+  int GenerateOdexForTestWithStatus(const std::vector<std::string>& dex_locations,
                                     const std::string& odex_location,
                                     CompilerFilter::Filter filter,
                                     std::string* error_msg,
@@ -63,7 +64,10 @@
                                     bool use_fd = false) {
     std::unique_ptr<File> oat_file;
     std::vector<std::string> args;
-    args.push_back("--dex-file=" + dex_location);
+    // Add dex file args.
+    for (const std::string& dex_location : dex_locations) {
+      args.push_back("--dex-file=" + dex_location);
+    }
     if (use_fd) {
       oat_file.reset(OS::CreateEmptyFile(odex_location.c_str()));
       CHECK(oat_file != nullptr) << odex_location;
@@ -93,7 +97,7 @@
                            bool use_fd = false,
                            std::function<void(const OatFile&)> check_oat = [](const OatFile&) {}) {
     std::string error_msg;
-    int status = GenerateOdexForTestWithStatus(dex_location,
+    int status = GenerateOdexForTestWithStatus({dex_location},
                                                odex_location,
                                                filter,
                                                &error_msg,
@@ -134,7 +138,7 @@
     }
   }
 
-  // Check the input compiler filter against the generated oat file's filter. Mayb be overridden
+  // Check the input compiler filter against the generated oat file's filter. May be overridden
   // in subclasses when equality is not expected.
   virtual void CheckFilter(CompilerFilter::Filter expected, CompilerFilter::Filter actual) {
     EXPECT_EQ(expected, actual);
@@ -153,14 +157,7 @@
 
     std::vector<std::string> argv;
     argv.push_back(runtime->GetCompilerExecutable());
-    argv.push_back("--runtime-arg");
-    argv.push_back("-classpath");
-    argv.push_back("--runtime-arg");
-    std::string class_path = runtime->GetClassPathString();
-    if (class_path == "") {
-      class_path = OatFile::kSpecialSharedLibrary;
-    }
-    argv.push_back(class_path);
+
     if (runtime->IsJavaDebuggable()) {
       argv.push_back("--debuggable");
     }
@@ -194,6 +191,14 @@
     CHECK(android_root != nullptr);
     argv.push_back("--android-root=" + std::string(android_root));
 
+    if (kDebugArgs) {
+      std::string all_args;
+      for (const std::string& arg : argv) {
+        all_args += arg + " ";
+      }
+      LOG(ERROR) << all_args;
+    }
+
     int link[2];
 
     if (pipe(link) == -1) {
@@ -945,6 +950,7 @@
 }
 
 TEST_F(Dex2oatWatchdogTest, TestWatchdogTrigger) {
+  TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND();  // b/63052624
   // Check with ten milliseconds.
   RunTest(false, { "--watchdog-timeout=10" });
 }
@@ -958,7 +964,7 @@
     Copy(GetTestDexFileName(), dex_location);
 
     std::string error_msg;
-    return GenerateOdexForTestWithStatus(dex_location,
+    return GenerateOdexForTestWithStatus({dex_location},
                                          odex_location,
                                          CompilerFilter::kSpeed,
                                          &error_msg,
@@ -1114,4 +1120,232 @@
   RunTest(context.c_str(), expected_classpath_key.c_str(), true);
 }
 
+class Dex2oatDeterminism : public Dex2oatTest {};
+
+TEST_F(Dex2oatDeterminism, UnloadCompile) {
+  if (!kUseReadBarrier &&
+      gc::kCollectorTypeDefault != gc::kCollectorTypeCMS &&
+      gc::kCollectorTypeDefault != gc::kCollectorTypeMS) {
+    LOG(INFO) << "Test requires determinism support.";
+    return;
+  }
+  Runtime* const runtime = Runtime::Current();
+  std::string out_dir = GetScratchDir();
+  const std::string base_oat_name = out_dir + "/base.oat";
+  const std::string base_vdex_name = out_dir + "/base.vdex";
+  const std::string unload_oat_name = out_dir + "/unload.oat";
+  const std::string unload_vdex_name = out_dir + "/unload.vdex";
+  const std::string no_unload_oat_name = out_dir + "/nounload.oat";
+  const std::string no_unload_vdex_name = out_dir + "/nounload.vdex";
+  const std::string app_image_name = out_dir + "/unload.art";
+  std::string error_msg;
+  const std::vector<gc::space::ImageSpace*>& spaces = runtime->GetHeap()->GetBootImageSpaces();
+  ASSERT_GT(spaces.size(), 0u);
+  const std::string image_location = spaces[0]->GetImageLocation();
+  // Without passing in an app image, it will unload in between compilations.
+  const int res = GenerateOdexForTestWithStatus(
+      GetLibCoreDexFileNames(),
+      base_oat_name,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--force-determinism", "--avoid-storing-invocation"});
+  EXPECT_EQ(res, 0);
+  Copy(base_oat_name, unload_oat_name);
+  Copy(base_vdex_name, unload_vdex_name);
+  std::unique_ptr<File> unload_oat(OS::OpenFileForReading(unload_oat_name.c_str()));
+  std::unique_ptr<File> unload_vdex(OS::OpenFileForReading(unload_vdex_name.c_str()));
+  ASSERT_TRUE(unload_oat != nullptr);
+  ASSERT_TRUE(unload_vdex != nullptr);
+  EXPECT_GT(unload_oat->GetLength(), 0u);
+  EXPECT_GT(unload_vdex->GetLength(), 0u);
+  // Regenerate with an app image to disable the dex2oat unloading and verify that the output is
+  // the same.
+  const int res2 = GenerateOdexForTestWithStatus(
+      GetLibCoreDexFileNames(),
+      base_oat_name,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--force-determinism", "--avoid-storing-invocation", "--app-image-file=" + app_image_name});
+  EXPECT_EQ(res2, 0);
+  Copy(base_oat_name, no_unload_oat_name);
+  Copy(base_vdex_name, no_unload_vdex_name);
+  std::unique_ptr<File> no_unload_oat(OS::OpenFileForReading(no_unload_oat_name.c_str()));
+  std::unique_ptr<File> no_unload_vdex(OS::OpenFileForReading(no_unload_vdex_name.c_str()));
+  ASSERT_TRUE(no_unload_oat != nullptr);
+  ASSERT_TRUE(no_unload_vdex != nullptr);
+  EXPECT_GT(no_unload_oat->GetLength(), 0u);
+  EXPECT_GT(no_unload_vdex->GetLength(), 0u);
+  // Verify that both of the files are the same (odex and vdex).
+  EXPECT_EQ(unload_oat->GetLength(), no_unload_oat->GetLength());
+  EXPECT_EQ(unload_vdex->GetLength(), no_unload_vdex->GetLength());
+  EXPECT_EQ(unload_oat->Compare(no_unload_oat.get()), 0)
+      << unload_oat_name << " " << no_unload_oat_name;
+  EXPECT_EQ(unload_vdex->Compare(no_unload_vdex.get()), 0)
+      << unload_vdex_name << " " << no_unload_vdex_name;
+  // App image file.
+  std::unique_ptr<File> app_image_file(OS::OpenFileForReading(app_image_name.c_str()));
+  ASSERT_TRUE(app_image_file != nullptr);
+  EXPECT_GT(app_image_file->GetLength(), 0u);
+}
+
+// Test that dexlayout section info is correctly written to the oat file for profile based
+// compilation.
+TEST_F(Dex2oatTest, LayoutSections) {
+  using Hotness = ProfileCompilationInfo::MethodHotness;
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  ScratchFile profile_file;
+  // We can only layout method indices with code items, figure out which ones have this property
+  // first.
+  std::vector<uint16_t> methods;
+  {
+    const DexFile::TypeId* type_id = dex->FindTypeId("LManyMethods;");
+    dex::TypeIndex type_idx = dex->GetIndexForTypeId(*type_id);
+    const DexFile::ClassDef* class_def = dex->FindClassDef(type_idx);
+    ClassDataItemIterator it(*dex, dex->GetClassData(*class_def));
+    it.SkipAllFields();
+    std::set<size_t> code_item_offsets;
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      const uint16_t method_idx = it.GetMemberIndex();
+      const size_t code_item_offset = it.GetMethodCodeItemOffset();
+      if (code_item_offsets.insert(code_item_offset).second) {
+        // Unique code item, add the method index.
+        methods.push_back(method_idx);
+      }
+    }
+    DCHECK(!it.HasNext());
+  }
+  ASSERT_GE(methods.size(), 8u);
+  std::vector<uint16_t> hot_methods = {methods[1], methods[3], methods[5]};
+  std::vector<uint16_t> startup_methods = {methods[1], methods[2], methods[7]};
+  std::vector<uint16_t> post_methods = {methods[0], methods[2], methods[6]};
+  // Here, we build the profile from the method lists.
+  ProfileCompilationInfo info;
+  info.AddMethodsForDex(
+      static_cast<Hotness::Flag>(Hotness::kFlagHot | Hotness::kFlagStartup),
+      dex.get(),
+      hot_methods.begin(),
+      hot_methods.end());
+  info.AddMethodsForDex(
+      Hotness::kFlagStartup,
+      dex.get(),
+      startup_methods.begin(),
+      startup_methods.end());
+  info.AddMethodsForDex(
+      Hotness::kFlagPostStartup,
+      dex.get(),
+      post_methods.begin(),
+      post_methods.end());
+  for (uint16_t id : hot_methods) {
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsHot());
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsStartup());
+  }
+  for (uint16_t id : startup_methods) {
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsStartup());
+  }
+  for (uint16_t id : post_methods) {
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsPostStartup());
+  }
+  // Save the profile since we want to use it with dex2oat to produce an oat file.
+  ASSERT_TRUE(info.Save(profile_file.GetFd()));
+  // Generate a profile based odex.
+  const std::string dir = GetScratchDir();
+  const std::string oat_filename = dir + "/base.oat";
+  const std::string vdex_filename = dir + "/base.vdex";
+  std::string error_msg;
+  const int res = GenerateOdexForTestWithStatus(
+      {dex->GetLocation()},
+      oat_filename,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--profile-file=" + profile_file.GetFilename()});
+  EXPECT_EQ(res, 0);
+
+  // Open our generated oat file.
+  std::unique_ptr<OatFile> odex_file(OatFile::Open(oat_filename.c_str(),
+                                                   oat_filename.c_str(),
+                                                   nullptr,
+                                                   nullptr,
+                                                   false,
+                                                   /*low_4gb*/false,
+                                                   dex->GetLocation().c_str(),
+                                                   &error_msg));
+  ASSERT_TRUE(odex_file != nullptr);
+  std::vector<const OatDexFile*> oat_dex_files = odex_file->GetOatDexFiles();
+  ASSERT_EQ(oat_dex_files.size(), 1u);
+  // Check that the code sections match what we expect.
+  for (const OatDexFile* oat_dex : oat_dex_files) {
+    const DexLayoutSections* const sections = oat_dex->GetDexLayoutSections();
+    // Testing of logging the sections.
+    ASSERT_TRUE(sections != nullptr);
+    LOG(INFO) << *sections;
+
+    // Load the sections into temporary variables for convenience.
+    const DexLayoutSection& code_section =
+        sections->sections_[static_cast<size_t>(DexLayoutSections::SectionType::kSectionTypeCode)];
+    const DexLayoutSection::Subsection& section_hot_code =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeHot)];
+    const DexLayoutSection::Subsection& section_sometimes_used =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeSometimesUsed)];
+    const DexLayoutSection::Subsection& section_startup_only =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeStartupOnly)];
+    const DexLayoutSection::Subsection& section_unused =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeUnused)];
+
+    // All the sections should be non-empty.
+    EXPECT_GT(section_hot_code.size_, 0u);
+    EXPECT_GT(section_sometimes_used.size_, 0u);
+    EXPECT_GT(section_startup_only.size_, 0u);
+    EXPECT_GT(section_unused.size_, 0u);
+
+    // Open the dex file since we need to peek at the code items to verify the layout matches what
+    // we expect.
+    std::unique_ptr<const DexFile> dex_file(oat_dex->OpenDexFile(&error_msg));
+    ASSERT_TRUE(dex_file != nullptr) << error_msg;
+    const DexFile::TypeId* type_id = dex_file->FindTypeId("LManyMethods;");
+    ASSERT_TRUE(type_id != nullptr);
+    dex::TypeIndex type_idx = dex_file->GetIndexForTypeId(*type_id);
+    const DexFile::ClassDef* class_def = dex_file->FindClassDef(type_idx);
+    ASSERT_TRUE(class_def != nullptr);
+
+    // Count how many code items are for each category, there should be at least one per category.
+    size_t hot_count = 0;
+    size_t post_startup_count = 0;
+    size_t startup_count = 0;
+    size_t unused_count = 0;
+    // Visit all of the methdos of the main class and cross reference the method indices to their
+    // corresponding code item offsets to verify the layout.
+    ClassDataItemIterator it(*dex_file, dex_file->GetClassData(*class_def));
+    it.SkipAllFields();
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      const size_t method_idx = it.GetMemberIndex();
+      const size_t code_item_offset = it.GetMethodCodeItemOffset();
+      const bool is_hot = ContainsElement(hot_methods, method_idx);
+      const bool is_startup = ContainsElement(startup_methods, method_idx);
+      const bool is_post_startup = ContainsElement(post_methods, method_idx);
+      if (is_hot) {
+        // Hot is highest precedence, check that the hot methods are in the hot section.
+        EXPECT_LT(code_item_offset - section_hot_code.offset_, section_hot_code.size_);
+        ++hot_count;
+      } else if (is_post_startup) {
+        // Post startup is sometimes used section.
+        EXPECT_LT(code_item_offset - section_sometimes_used.offset_, section_sometimes_used.size_);
+        ++post_startup_count;
+      } else if (is_startup) {
+        // Startup at this point means not hot or post startup, these must be startup only then.
+        EXPECT_LT(code_item_offset - section_startup_only.offset_, section_startup_only.size_);
+        ++startup_count;
+      } else {
+        // If no flags are set, the method should be unused.
+        EXPECT_LT(code_item_offset - section_unused.offset_, section_unused.size_);
+        ++unused_count;
+      }
+    }
+    DCHECK(!it.HasNext());
+    EXPECT_GT(hot_count, 0u);
+    EXPECT_GT(post_startup_count, 0u);
+    EXPECT_GT(startup_count, 0u);
+    EXPECT_GT(unused_count, 0u);
+  }
+}
+
 }  // namespace art
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index fd92d77..401a3ee 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -34,6 +34,7 @@
 #include "android-base/stringprintf.h"
 
 #include "dex_file-inl.h"
+#include "dex_file_layout.h"
 #include "dex_file_verifier.h"
 #include "dex_instruction-inl.h"
 #include "dex_ir_builder.h"
@@ -1680,21 +1681,13 @@
     }
   }
 
-  enum CodeItemState {
-    kCodeItemStateExecStartupOnly = 0,
-    kCodeItemStateHot,
-    kCodeItemStateClinit,
-    kCodeItemStateExec,
-    kCodeItemStateNotExecuted,
-    kCodeItemStateSize,
-  };
-
   static constexpr InvokeType invoke_types[] = {
     kDirect,
     kVirtual
   };
 
-  std::unordered_set<dex_ir::CodeItem*> code_items[kCodeItemStateSize];
+  const size_t num_layout_types = static_cast<size_t>(LayoutType::kLayoutTypeCount);
+  std::unordered_set<dex_ir::CodeItem*> code_items[num_layout_types];
   for (InvokeType invoke_type : invoke_types) {
     for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
       const bool is_profile_class =
@@ -1719,20 +1712,20 @@
         const bool is_startup_clinit = is_profile_class && is_clinit;
         using Hotness = ProfileCompilationInfo::MethodHotness;
         Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
-        CodeItemState state = kCodeItemStateNotExecuted;
+        LayoutType state = LayoutType::kLayoutTypeUnused;
         if (hotness.IsHot()) {
           // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
           // now.
-          state = kCodeItemStateHot;
+          state = LayoutType::kLayoutTypeHot;
         } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
           // Startup clinit or a method that only has the startup flag.
-          state = kCodeItemStateExecStartupOnly;
+          state = LayoutType::kLayoutTypeStartupOnly;
         } else if (is_clinit) {
-          state = kCodeItemStateClinit;
+          state = LayoutType::kLayoutTypeUsedOnce;
         } else if (hotness.IsInProfile()) {
-          state = kCodeItemStateExec;
+          state = LayoutType::kLayoutTypeSometimesUsed;
         }
-        code_items[state].insert(code_item);
+        code_items[static_cast<size_t>(state)].insert(code_item);
       }
     }
   }
@@ -1741,10 +1734,11 @@
   int32_t total_diff = 0;
   // The relative placement has no effect on correctness; it is used to ensure
   // the layout is deterministic
-  for (std::unordered_set<dex_ir::CodeItem*>& code_items_set : code_items) {
+  for (size_t index = 0; index < num_layout_types; ++index) {
+    const std::unordered_set<dex_ir::CodeItem*>& code_items_set = code_items[index];
     // diff is reset for each class of code items.
     int32_t diff = 0;
-    uint32_t start_offset = code_item_offset;
+    const uint32_t start_offset = code_item_offset;
     for (dex_ir::ClassData* data : new_class_data_order) {
       data->SetOffset(data->GetOffset() + diff);
       for (InvokeType invoke_type : invoke_types) {
@@ -1763,9 +1757,13 @@
         }
       }
     }
-    for (size_t i = 0; i < kCodeItemStateSize; ++i) {
+    DexLayoutSection& code_section = dex_sections_.sections_[static_cast<size_t>(
+        DexLayoutSections::SectionType::kSectionTypeCode)];
+    code_section.parts_[index].offset_ = start_offset;
+    code_section.parts_[index].size_ = code_item_offset - start_offset;
+    for (size_t i = 0; i < num_layout_types; ++i) {
       VLOG(dex) << "Code item layout bucket " << i << " count=" << code_items[i].size()
-                << " bytes=" << code_item_offset - start_offset;
+                << " bytes=" << code_section.parts_[i].size_;
     }
     total_diff += diff;
   }
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index ed011d6..9f6e8a4 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -26,6 +26,7 @@
 #include <stdint.h>
 #include <stdio.h>
 
+#include "dex_file_layout.h"
 #include "dex_ir.h"
 #include "mem_map.h"
 
@@ -84,6 +85,10 @@
 
   MemMap* GetAndReleaseMemMap() { return mem_map_.release(); }
 
+  const DexLayoutSections& GetSections() const {
+    return dex_sections_;
+  }
+
  private:
   void DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item);
   void DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset);
@@ -129,6 +134,7 @@
   FILE* out_file_;
   dex_ir::Header* header_;
   std::unique_ptr<MemMap> mem_map_;
+  DexLayoutSections dex_sections_;
 
   DISALLOW_COPY_AND_ASSIGN(DexLayout);
 };
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 4824f70..bbc8e37 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -792,6 +792,7 @@
         src_reg_file = dst_reg_file = SSE;
         break;
       case 0x60: case 0x61: case 0x62: case 0x6C:
+      case 0x68: case 0x69: case 0x6A: case 0x6D:
         if (prefix[2] == 0x66) {
           src_reg_file = dst_reg_file = SSE;
           prefix[2] = 0;  // Clear prefix now. It has served its purpose as part of the opcode.
@@ -803,6 +804,10 @@
           case 0x61: opcode1 = "punpcklwd"; break;
           case 0x62: opcode1 = "punpckldq"; break;
           case 0x6c: opcode1 = "punpcklqdq"; break;
+          case 0x68: opcode1 = "punpckhbw"; break;
+          case 0x69: opcode1 = "punpckhwd"; break;
+          case 0x6A: opcode1 = "punpckhdq"; break;
+          case 0x6D: opcode1 = "punpckhqdq"; break;
         }
         load = true;
         has_modrm = true;
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index fb8e894..9ffc414 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -333,9 +333,11 @@
                         std::vector<uint8_t>* remote_contents,
                         std::vector<uint8_t>* zygote_contents,
                         const backtrace_map_t& boot_map,
-                        const ImageHeader& image_header) :
-    RegionCommon<mirror::Object>(os, remote_contents, zygote_contents, boot_map, image_header),
-    os_(*os) { }
+                        const ImageHeader& image_header,
+                        bool dump_dirty_objects)
+      : RegionCommon<mirror::Object>(os, remote_contents, zygote_contents, boot_map, image_header),
+        os_(*os),
+        dump_dirty_objects_(dump_dirty_objects) { }
 
   void CheckEntrySanity(const uint8_t* current) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -396,7 +398,10 @@
     class_data_[klass].AddDirtyObject(entry, entry_remote);
   }
 
-  void DiffEntryContents(mirror::Object* entry, uint8_t* remote_bytes, const uint8_t* base_ptr)
+  void DiffEntryContents(mirror::Object* entry,
+                         uint8_t* remote_bytes,
+                         const uint8_t* base_ptr,
+                         bool log_dirty_objects)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     const char* tabs = "    ";
     // Attempt to find fields for all dirty bytes.
@@ -453,6 +458,9 @@
       }
     }
     if (!dirty_static_fields.empty()) {
+      if (dump_dirty_objects_ && log_dirty_objects) {
+        dirty_objects_.insert(entry);
+      }
       os_ << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
       for (ArtField* field : dirty_static_fields) {
         os_ << tabs << ArtField::PrettyField(field)
@@ -463,6 +471,14 @@
     os_ << "\n";
   }
 
+  void DumpDirtyObjects() REQUIRES_SHARED(Locks::mutator_lock_) {
+    for (mirror::Object* obj : dirty_objects_) {
+      if (obj->IsClass()) {
+        os_ << "Private dirty object: " << obj->AsClass()->PrettyDescriptor() << "\n";
+      }
+    }
+  }
+
   void DumpDirtyEntries() REQUIRES_SHARED(Locks::mutator_lock_) {
     // vector of pairs (size_t count, Class*)
     auto dirty_object_class_values =
@@ -592,6 +608,8 @@
   };
 
   std::ostream& os_;
+  bool dump_dirty_objects_;
+  std::unordered_set<mirror::Object*> dirty_objects_;
   std::map<mirror::Class*, ClassData> class_data_;
 
   DISALLOW_COPY_AND_ASSIGN(RegionSpecializedBase);
@@ -720,9 +738,15 @@
              std::vector<uint8_t>* remote_contents,
              std::vector<uint8_t>* zygote_contents,
              const backtrace_map_t& boot_map,
-             const ImageHeader& image_header) :
-    RegionSpecializedBase<T>(os, remote_contents, zygote_contents, boot_map, image_header),
-    os_(*os) {
+             const ImageHeader& image_header,
+             bool dump_dirty_objects)
+      : RegionSpecializedBase<T>(os,
+                                 remote_contents,
+                                 zygote_contents,
+                                 boot_map,
+                                 image_header,
+                                 dump_dirty_objects),
+        os_(*os) {
     CHECK(remote_contents != nullptr);
     CHECK(zygote_contents != nullptr);
   }
@@ -773,7 +797,8 @@
     DiffDirtyEntries(ProcessType::kRemote,
                      begin_image_ptr,
                      RegionCommon<T>::remote_contents_,
-                     base_ptr);
+                     base_ptr,
+                     /*log_dirty_objects*/true);
     // Print shared dirty after since it's less important.
     if (RegionCommon<T>::GetZygoteDirtyEntryCount() != 0) {
       // We only reach this point if both pids were specified.  Furthermore,
@@ -784,8 +809,10 @@
       DiffDirtyEntries(ProcessType::kZygote,
                        begin_image_ptr,
                        RegionCommon<T>::zygote_contents_,
-                       begin_image_ptr);
+                       begin_image_ptr,
+                       /*log_dirty_objects*/false);
     }
+    RegionSpecializedBase<T>::DumpDirtyObjects();
     RegionSpecializedBase<T>::DumpDirtyEntries();
     RegionSpecializedBase<T>::DumpFalseDirtyEntries();
     RegionSpecializedBase<T>::DumpCleanEntries();
@@ -797,7 +824,8 @@
   void DiffDirtyEntries(ProcessType process_type,
                         const uint8_t* begin_image_ptr,
                         std::vector<uint8_t>* contents,
-                        const uint8_t* base_ptr)
+                        const uint8_t* base_ptr,
+                        bool log_dirty_objects)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     os_ << RegionCommon<T>::dirty_entries_.size() << "\n";
     const std::set<T*>& entries =
@@ -808,7 +836,10 @@
       uint8_t* entry_bytes = reinterpret_cast<uint8_t*>(entry);
       ptrdiff_t offset = entry_bytes - begin_image_ptr;
       uint8_t* remote_bytes = &(*contents)[offset];
-      RegionSpecializedBase<T>::DiffEntryContents(entry, remote_bytes, &base_ptr[offset]);
+      RegionSpecializedBase<T>::DiffEntryContents(entry,
+                                                  remote_bytes,
+                                                  &base_ptr[offset],
+                                                  log_dirty_objects);
     }
   }
 
@@ -872,12 +903,14 @@
                          const ImageHeader& image_header,
                          const std::string& image_location,
                          pid_t image_diff_pid,
-                         pid_t zygote_diff_pid)
+                         pid_t zygote_diff_pid,
+                         bool dump_dirty_objects)
       : os_(os),
         image_header_(image_header),
         image_location_(image_location),
         image_diff_pid_(image_diff_pid),
         zygote_diff_pid_(zygote_diff_pid),
+        dump_dirty_objects_(dump_dirty_objects),
         zygote_pid_only_(false) {}
 
   bool Init() {
@@ -1075,6 +1108,8 @@
       }
     }
 
+    std::vector<size_t> private_dirty_pages_for_section(ImageHeader::kSectionCount, 0u);
+
     // Iterate through one byte at a time.
     ptrdiff_t page_off_begin = image_header_.GetImageBegin() - image_begin;
     for (uintptr_t begin = boot_map_.start; begin != boot_map_.end; ++begin) {
@@ -1127,6 +1162,12 @@
 
         if (is_dirty && is_private) {
           mapping_data->private_dirty_pages++;
+          for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
+            const ImageHeader::ImageSections section = static_cast<ImageHeader::ImageSections>(i);
+            if (image_header_.GetImageSection(section).Contains(offset)) {
+              ++private_dirty_pages_for_section[i];
+            }
+          }
         }
       }
     }
@@ -1138,7 +1179,19 @@
        << mapping_data->dirty_pages << " pages are dirty;\n  "
        << mapping_data->false_dirty_pages << " pages are false dirty;\n  "
        << mapping_data->private_pages << " pages are private;\n  "
-       << mapping_data->private_dirty_pages << " pages are Private_Dirty\n  ";
+       << mapping_data->private_dirty_pages << " pages are Private_Dirty\n  "
+       << "\n";
+
+    size_t total_private_dirty_pages = std::accumulate(private_dirty_pages_for_section.begin(),
+                                                       private_dirty_pages_for_section.end(),
+                                                       0u);
+    os << "Image sections (total private dirty pages " << total_private_dirty_pages << ")\n";
+    for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
+      const ImageHeader::ImageSections section = static_cast<ImageHeader::ImageSections>(i);
+      os << section << " " << image_header_.GetImageSection(section)
+         << " private dirty pages=" << private_dirty_pages_for_section[i] << "\n";
+    }
+    os << "\n";
 
     return true;
   }
@@ -1187,7 +1240,8 @@
                                                   &remote_contents_,
                                                   &zygote_contents_,
                                                   boot_map_,
-                                                  image_header_);
+                                                  image_header_,
+                                                  dump_dirty_objects_);
 
     RemoteProcesses remotes;
     if (zygote_pid_only_) {
@@ -1344,6 +1398,7 @@
   const std::string image_location_;
   pid_t image_diff_pid_;  // Dump image diff against boot.art if pid is non-negative
   pid_t zygote_diff_pid_;  // Dump image diff against zygote boot.art if pid is non-negative
+  bool dump_dirty_objects_;  // Adds dumping of objects that are dirty.
   bool zygote_pid_only_;  // The user only specified a pid for the zygote.
 
   // BacktraceMap used for finding the memory mapping of the image file.
@@ -1371,7 +1426,8 @@
 static int DumpImage(Runtime* runtime,
                      std::ostream* os,
                      pid_t image_diff_pid,
-                     pid_t zygote_diff_pid) {
+                     pid_t zygote_diff_pid,
+                     bool dump_dirty_objects) {
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
   std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
@@ -1387,7 +1443,8 @@
                                   image_header,
                                   image_space->GetImageLocation(),
                                   image_diff_pid,
-                                  zygote_diff_pid);
+                                  zygote_diff_pid,
+                                  dump_dirty_objects);
     if (!img_diag_dumper.Init()) {
       return EXIT_FAILURE;
     }
@@ -1425,6 +1482,8 @@
         *error_msg = "Zygote diff pid out of range";
         return kParseError;
       }
+    } else if (option == "--dump-dirty-objects") {
+      dump_dirty_objects_ = true;
     } else {
       return kParseUnknownArgument;
     }
@@ -1477,6 +1536,7 @@
         "  --zygote-diff-pid=<pid>: provide the PID of the zygote whose boot.art you want to diff "
         "against.\n"
         "      Example: --zygote-diff-pid=$(pid zygote)\n"
+        "  --dump-dirty-objects: additionally output dirty objects of interest.\n"
         "\n";
 
     return usage;
@@ -1485,6 +1545,7 @@
  public:
   pid_t image_diff_pid_ = -1;
   pid_t zygote_diff_pid_ = -1;
+  bool dump_dirty_objects_ = false;
 };
 
 struct ImgDiagMain : public CmdlineMain<ImgDiagArgs> {
@@ -1494,7 +1555,8 @@
     return DumpImage(runtime,
                      args_->os_,
                      args_->image_diff_pid_,
-                     args_->zygote_diff_pid_) == EXIT_SUCCESS;
+                     args_->zygote_diff_pid_,
+                     args_->dump_dirty_objects_) == EXIT_SUCCESS;
   }
 };
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 99168c9..6f833c6 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -89,6 +89,8 @@
   "kSaveRefsOnlyMethod",
   "kSaveRefsAndArgsMethod",
   "kSaveEverythingMethod",
+  "kSaveEverythingMethodForClinit",
+  "kSaveEverythingMethodForSuspendCheck",
 };
 
 const char* image_roots_descriptions_[] = {
@@ -498,6 +500,13 @@
       os << "Dex file data for " << dex_file->GetLocation() << "\n";
       data.Dump(os);
       os << "\n";
+      const DexLayoutSections* const layout_sections = oat_dex_file->GetDexLayoutSections();
+      if (layout_sections != nullptr) {
+        os << "Layout data\n";
+        os << *layout_sections;
+        os << "\n";
+      }
+
       cumulative.Add(data);
     }
     os << "Cumulative dex file data\n";
diff --git a/runtime/openjdkjvmti/Android.bp b/openjdkjvmti/Android.bp
similarity index 97%
rename from runtime/openjdkjvmti/Android.bp
rename to openjdkjvmti/Android.bp
index aec1bd0..b6b1b56 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/openjdkjvmti/Android.bp
@@ -48,7 +48,6 @@
            "ti_threadgroup.cc",
            "ti_timers.cc",
            "transform.cc"],
-    include_dirs: ["art/runtime"],
     header_libs: ["libopenjdkjvmti_headers"],
     shared_libs: [
         "libbase",
diff --git a/runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION b/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
similarity index 100%
rename from runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
rename to openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
diff --git a/runtime/openjdkjvmti/NOTICE b/openjdkjvmti/NOTICE
similarity index 100%
rename from runtime/openjdkjvmti/NOTICE
rename to openjdkjvmti/NOTICE
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/openjdkjvmti/OpenjdkJvmTi.cc
similarity index 100%
rename from runtime/openjdkjvmti/OpenjdkJvmTi.cc
rename to openjdkjvmti/OpenjdkJvmTi.cc
diff --git a/runtime/openjdkjvmti/README.md b/openjdkjvmti/README.md
similarity index 100%
rename from runtime/openjdkjvmti/README.md
rename to openjdkjvmti/README.md
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/openjdkjvmti/art_jvmti.h
similarity index 97%
rename from runtime/openjdkjvmti/art_jvmti.h
rename to openjdkjvmti/art_jvmti.h
index a2259c7..12f4cab 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/openjdkjvmti/art_jvmti.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
-#define ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
+#ifndef ART_OPENJDKJVMTI_ART_JVMTI_H_
+#define ART_OPENJDKJVMTI_ART_JVMTI_H_
 
 #include <memory>
 #include <type_traits>
@@ -232,7 +232,7 @@
     .can_get_line_numbers                            = 1,
     .can_get_source_debug_extension                  = 1,
     .can_access_local_variables                      = 1,
-    .can_maintain_original_method_order              = 0,
+    .can_maintain_original_method_order              = 1,
     .can_generate_single_step_events                 = 1,
     .can_generate_exception_events                   = 0,
     .can_generate_frame_pop_events                   = 0,
@@ -262,4 +262,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
+#endif  // ART_OPENJDKJVMTI_ART_JVMTI_H_
diff --git a/runtime/openjdkjvmti/events-inl.h b/openjdkjvmti/events-inl.h
similarity index 98%
rename from runtime/openjdkjvmti/events-inl.h
rename to openjdkjvmti/events-inl.h
index 91e4055..32dba3e 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/openjdkjvmti/events-inl.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_EVENTS_INL_H_
-#define ART_RUNTIME_OPENJDKJVMTI_EVENTS_INL_H_
+#ifndef ART_OPENJDKJVMTI_EVENTS_INL_H_
+#define ART_OPENJDKJVMTI_EVENTS_INL_H_
 
 #include <array>
 
@@ -437,4 +437,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_EVENTS_INL_H_
+#endif  // ART_OPENJDKJVMTI_EVENTS_INL_H_
diff --git a/runtime/openjdkjvmti/events.cc b/openjdkjvmti/events.cc
similarity index 100%
rename from runtime/openjdkjvmti/events.cc
rename to openjdkjvmti/events.cc
diff --git a/runtime/openjdkjvmti/events.h b/openjdkjvmti/events.h
similarity index 98%
rename from runtime/openjdkjvmti/events.h
rename to openjdkjvmti/events.h
index 617519e..3d05fa1 100644
--- a/runtime/openjdkjvmti/events.h
+++ b/openjdkjvmti/events.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_EVENTS_H_
-#define ART_RUNTIME_OPENJDKJVMTI_EVENTS_H_
+#ifndef ART_OPENJDKJVMTI_EVENTS_H_
+#define ART_OPENJDKJVMTI_EVENTS_H_
 
 #include <bitset>
 #include <vector>
@@ -227,4 +227,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_EVENTS_H_
+#endif  // ART_OPENJDKJVMTI_EVENTS_H_
diff --git a/runtime/openjdkjvmti/fixed_up_dex_file.cc b/openjdkjvmti/fixed_up_dex_file.cc
similarity index 100%
rename from runtime/openjdkjvmti/fixed_up_dex_file.cc
rename to openjdkjvmti/fixed_up_dex_file.cc
diff --git a/runtime/openjdkjvmti/fixed_up_dex_file.h b/openjdkjvmti/fixed_up_dex_file.h
similarity index 94%
rename from runtime/openjdkjvmti/fixed_up_dex_file.h
rename to openjdkjvmti/fixed_up_dex_file.h
index a96ee12..4cb39cf 100644
--- a/runtime/openjdkjvmti/fixed_up_dex_file.h
+++ b/openjdkjvmti/fixed_up_dex_file.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
-#define ART_RUNTIME_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
+#ifndef ART_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
+#define ART_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
 
 #include <memory>
 #include <vector>
@@ -80,4 +80,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
+#endif  // ART_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
diff --git a/runtime/openjdkjvmti/include/jvmti.h b/openjdkjvmti/include/jvmti.h
similarity index 100%
rename from runtime/openjdkjvmti/include/jvmti.h
rename to openjdkjvmti/include/jvmti.h
diff --git a/runtime/openjdkjvmti/jvmti_allocator.h b/openjdkjvmti/jvmti_allocator.h
similarity index 96%
rename from runtime/openjdkjvmti/jvmti_allocator.h
rename to openjdkjvmti/jvmti_allocator.h
index 44b1cb1..e29e034 100644
--- a/runtime/openjdkjvmti/jvmti_allocator.h
+++ b/openjdkjvmti/jvmti_allocator.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
-#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
+#ifndef ART_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
+#define ART_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
 
 #include "base/logging.h"
 #include "base/macros.h"
@@ -171,4 +171,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
+#endif  // ART_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/openjdkjvmti/jvmti_weak_table-inl.h
similarity index 98%
rename from runtime/openjdkjvmti/jvmti_weak_table-inl.h
rename to openjdkjvmti/jvmti_weak_table-inl.h
index a640acb..1c82255 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table-inl.h
+++ b/openjdkjvmti/jvmti_weak_table-inl.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
-#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+#ifndef ART_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+#define ART_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
 
 #include "jvmti_weak_table.h"
 
@@ -403,4 +403,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+#endif  // ART_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/openjdkjvmti/jvmti_weak_table.h
similarity index 97%
rename from runtime/openjdkjvmti/jvmti_weak_table.h
rename to openjdkjvmti/jvmti_weak_table.h
index a5175a4..5a821c9 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table.h
+++ b/openjdkjvmti/jvmti_weak_table.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
-#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+#ifndef ART_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+#define ART_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
 
 #include <unordered_map>
 
@@ -224,4 +224,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+#endif  // ART_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/openjdkjvmti/object_tagging.cc
similarity index 100%
rename from runtime/openjdkjvmti/object_tagging.cc
rename to openjdkjvmti/object_tagging.cc
diff --git a/runtime/openjdkjvmti/object_tagging.h b/openjdkjvmti/object_tagging.h
similarity index 94%
rename from runtime/openjdkjvmti/object_tagging.h
rename to openjdkjvmti/object_tagging.h
index ca84e44..b474845 100644
--- a/runtime/openjdkjvmti/object_tagging.h
+++ b/openjdkjvmti/object_tagging.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
-#define ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
+#ifndef ART_OPENJDKJVMTI_OBJECT_TAGGING_H_
+#define ART_OPENJDKJVMTI_OBJECT_TAGGING_H_
 
 #include <unordered_map>
 
@@ -83,4 +83,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
+#endif  // ART_OPENJDKJVMTI_OBJECT_TAGGING_H_
diff --git a/runtime/openjdkjvmti/ti_allocator.cc b/openjdkjvmti/ti_allocator.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_allocator.cc
rename to openjdkjvmti/ti_allocator.cc
diff --git a/runtime/openjdkjvmti/ti_allocator.h b/openjdkjvmti/ti_allocator.h
similarity index 93%
rename from runtime/openjdkjvmti/ti_allocator.h
rename to openjdkjvmti/ti_allocator.h
index 35575c3..776cc5e 100644
--- a/runtime/openjdkjvmti/ti_allocator.h
+++ b/openjdkjvmti/ti_allocator.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_ALLOCATOR_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_ALLOCATOR_H_
+#ifndef ART_OPENJDKJVMTI_TI_ALLOCATOR_H_
+#define ART_OPENJDKJVMTI_TI_ALLOCATOR_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -61,5 +61,5 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_ALLOCATOR_H_
+#endif  // ART_OPENJDKJVMTI_TI_ALLOCATOR_H_
 
diff --git a/runtime/openjdkjvmti/ti_breakpoint.cc b/openjdkjvmti/ti_breakpoint.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_breakpoint.cc
rename to openjdkjvmti/ti_breakpoint.cc
diff --git a/runtime/openjdkjvmti/ti_breakpoint.h b/openjdkjvmti/ti_breakpoint.h
similarity index 94%
rename from runtime/openjdkjvmti/ti_breakpoint.h
rename to openjdkjvmti/ti_breakpoint.h
index c3dbef7..9b08b42 100644
--- a/runtime/openjdkjvmti/ti_breakpoint.h
+++ b/openjdkjvmti/ti_breakpoint.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_BREAKPOINT_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_BREAKPOINT_H_
+#ifndef ART_OPENJDKJVMTI_TI_BREAKPOINT_H_
+#define ART_OPENJDKJVMTI_TI_BREAKPOINT_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -91,4 +91,4 @@
 };
 
 }  // namespace std
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_BREAKPOINT_H_
+#endif  // ART_OPENJDKJVMTI_TI_BREAKPOINT_H_
diff --git a/runtime/openjdkjvmti/ti_class.cc b/openjdkjvmti/ti_class.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_class.cc
rename to openjdkjvmti/ti_class.cc
diff --git a/runtime/openjdkjvmti/ti_class.h b/openjdkjvmti/ti_class.h
similarity index 96%
rename from runtime/openjdkjvmti/ti_class.h
rename to openjdkjvmti/ti_class.h
index 7bb6b3e..dd99e36 100644
--- a/runtime/openjdkjvmti/ti_class.h
+++ b/openjdkjvmti/ti_class.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_H_
+#ifndef ART_OPENJDKJVMTI_TI_CLASS_H_
+#define ART_OPENJDKJVMTI_TI_CLASS_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -92,4 +92,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_H_
+#endif  // ART_OPENJDKJVMTI_TI_CLASS_H_
diff --git a/runtime/openjdkjvmti/ti_class_definition.cc b/openjdkjvmti/ti_class_definition.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_class_definition.cc
rename to openjdkjvmti/ti_class_definition.cc
diff --git a/runtime/openjdkjvmti/ti_class_definition.h b/openjdkjvmti/ti_class_definition.h
similarity index 95%
rename from runtime/openjdkjvmti/ti_class_definition.h
rename to openjdkjvmti/ti_class_definition.h
index 2c268dd..accc456 100644
--- a/runtime/openjdkjvmti/ti_class_definition.h
+++ b/openjdkjvmti/ti_class_definition.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_DEFINITION_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_DEFINITION_H_
+#ifndef ART_OPENJDKJVMTI_TI_CLASS_DEFINITION_H_
+#define ART_OPENJDKJVMTI_TI_CLASS_DEFINITION_H_
 
 #include "art_jvmti.h"
 
@@ -128,4 +128,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_DEFINITION_H_
+#endif  // ART_OPENJDKJVMTI_TI_CLASS_DEFINITION_H_
diff --git a/runtime/openjdkjvmti/ti_class_loader.cc b/openjdkjvmti/ti_class_loader.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_class_loader.cc
rename to openjdkjvmti/ti_class_loader.cc
diff --git a/runtime/openjdkjvmti/ti_class_loader.h b/openjdkjvmti/ti_class_loader.h
similarity index 95%
rename from runtime/openjdkjvmti/ti_class_loader.h
rename to openjdkjvmti/ti_class_loader.h
index af66c5f..767e258 100644
--- a/runtime/openjdkjvmti/ti_class_loader.h
+++ b/openjdkjvmti/ti_class_loader.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_LOADER_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_LOADER_H_
+#ifndef ART_OPENJDKJVMTI_TI_CLASS_LOADER_H_
+#define ART_OPENJDKJVMTI_TI_CLASS_LOADER_H_
 
 #include <string>
 
@@ -96,4 +96,4 @@
 };
 
 }  // namespace openjdkjvmti
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_LOADER_H_
+#endif  // ART_OPENJDKJVMTI_TI_CLASS_LOADER_H_
diff --git a/runtime/openjdkjvmti/ti_dump.cc b/openjdkjvmti/ti_dump.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_dump.cc
rename to openjdkjvmti/ti_dump.cc
diff --git a/runtime/openjdkjvmti/ti_dump.h b/openjdkjvmti/ti_dump.h
similarity index 92%
rename from runtime/openjdkjvmti/ti_dump.h
rename to openjdkjvmti/ti_dump.h
index 67cb239..323bf56 100644
--- a/runtime/openjdkjvmti/ti_dump.h
+++ b/openjdkjvmti/ti_dump.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_DUMP_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_DUMP_H_
+#ifndef ART_OPENJDKJVMTI_TI_DUMP_H_
+#define ART_OPENJDKJVMTI_TI_DUMP_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -47,4 +47,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_DUMP_H_
+#endif  // ART_OPENJDKJVMTI_TI_DUMP_H_
diff --git a/runtime/openjdkjvmti/ti_field.cc b/openjdkjvmti/ti_field.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_field.cc
rename to openjdkjvmti/ti_field.cc
diff --git a/runtime/openjdkjvmti/ti_field.h b/openjdkjvmti/ti_field.h
similarity index 95%
rename from runtime/openjdkjvmti/ti_field.h
rename to openjdkjvmti/ti_field.h
index 880949e..8a229ed 100644
--- a/runtime/openjdkjvmti/ti_field.h
+++ b/openjdkjvmti/ti_field.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_FIELD_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_FIELD_H_
+#ifndef ART_OPENJDKJVMTI_TI_FIELD_H_
+#define ART_OPENJDKJVMTI_TI_FIELD_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -69,4 +69,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_FIELD_H_
+#endif  // ART_OPENJDKJVMTI_TI_FIELD_H_
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/openjdkjvmti/ti_heap.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_heap.cc
rename to openjdkjvmti/ti_heap.cc
diff --git a/runtime/openjdkjvmti/ti_heap.h b/openjdkjvmti/ti_heap.h
similarity index 94%
rename from runtime/openjdkjvmti/ti_heap.h
rename to openjdkjvmti/ti_heap.h
index 0c973db..62761b5 100644
--- a/runtime/openjdkjvmti/ti_heap.h
+++ b/openjdkjvmti/ti_heap.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
+#ifndef ART_OPENJDKJVMTI_TI_HEAP_H_
+#define ART_OPENJDKJVMTI_TI_HEAP_H_
 
 #include "jvmti.h"
 
@@ -70,4 +70,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
+#endif  // ART_OPENJDKJVMTI_TI_HEAP_H_
diff --git a/runtime/openjdkjvmti/ti_jni.cc b/openjdkjvmti/ti_jni.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_jni.cc
rename to openjdkjvmti/ti_jni.cc
diff --git a/runtime/openjdkjvmti/ti_jni.h b/openjdkjvmti/ti_jni.h
similarity index 94%
rename from runtime/openjdkjvmti/ti_jni.h
rename to openjdkjvmti/ti_jni.h
index 906aab0..590fd54 100644
--- a/runtime/openjdkjvmti/ti_jni.h
+++ b/openjdkjvmti/ti_jni.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_JNI_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_JNI_H_
+#ifndef ART_OPENJDKJVMTI_TI_JNI_H_
+#define ART_OPENJDKJVMTI_TI_JNI_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -55,4 +55,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_JNI_H_
+#endif  // ART_OPENJDKJVMTI_TI_JNI_H_
diff --git a/runtime/openjdkjvmti/ti_method.cc b/openjdkjvmti/ti_method.cc
similarity index 99%
rename from runtime/openjdkjvmti/ti_method.cc
rename to openjdkjvmti/ti_method.cc
index ed5645a..8f72714 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/openjdkjvmti/ti_method.cc
@@ -782,6 +782,7 @@
   }
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);
+  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
   art::Thread* target = ThreadUtil::GetNativeThread(thread, soa);
   if (target == nullptr && thread == nullptr) {
     return ERR(INVALID_THREAD);
@@ -790,7 +791,6 @@
     return ERR(THREAD_NOT_ALIVE);
   }
   GetLocalVariableClosure c(self, depth, slot, type, val);
-  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
   if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
@@ -909,6 +909,7 @@
   }
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);
+  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
   art::Thread* target = ThreadUtil::GetNativeThread(thread, soa);
   if (target == nullptr && thread == nullptr) {
     return ERR(INVALID_THREAD);
@@ -917,7 +918,6 @@
     return ERR(THREAD_NOT_ALIVE);
   }
   SetLocalVariableClosure c(self, depth, slot, type, val);
-  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
   if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
@@ -974,6 +974,7 @@
   }
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);
+  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
   art::Thread* target = ThreadUtil::GetNativeThread(thread, soa);
   if (target == nullptr && thread == nullptr) {
     return ERR(INVALID_THREAD);
@@ -982,7 +983,6 @@
     return ERR(THREAD_NOT_ALIVE);
   }
   GetLocalInstanceClosure c(self, depth, data);
-  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
   if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
diff --git a/runtime/openjdkjvmti/ti_method.h b/openjdkjvmti/ti_method.h
similarity index 96%
rename from runtime/openjdkjvmti/ti_method.h
rename to openjdkjvmti/ti_method.h
index aabaedb..e3578a4 100644
--- a/runtime/openjdkjvmti/ti_method.h
+++ b/openjdkjvmti/ti_method.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_METHOD_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_METHOD_H_
+#ifndef ART_OPENJDKJVMTI_TI_METHOD_H_
+#define ART_OPENJDKJVMTI_TI_METHOD_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -111,4 +111,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_METHOD_H_
+#endif  // ART_OPENJDKJVMTI_TI_METHOD_H_
diff --git a/runtime/openjdkjvmti/ti_monitor.cc b/openjdkjvmti/ti_monitor.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_monitor.cc
rename to openjdkjvmti/ti_monitor.cc
diff --git a/runtime/openjdkjvmti/ti_monitor.h b/openjdkjvmti/ti_monitor.h
similarity index 93%
rename from runtime/openjdkjvmti/ti_monitor.h
rename to openjdkjvmti/ti_monitor.h
index 96ccb0d..add089c 100644
--- a/runtime/openjdkjvmti/ti_monitor.h
+++ b/openjdkjvmti/ti_monitor.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_MONITOR_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_MONITOR_H_
+#ifndef ART_OPENJDKJVMTI_TI_MONITOR_H_
+#define ART_OPENJDKJVMTI_TI_MONITOR_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -56,4 +56,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_MONITOR_H_
+#endif  // ART_OPENJDKJVMTI_TI_MONITOR_H_
diff --git a/runtime/openjdkjvmti/ti_object.cc b/openjdkjvmti/ti_object.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_object.cc
rename to openjdkjvmti/ti_object.cc
diff --git a/runtime/openjdkjvmti/ti_object.h b/openjdkjvmti/ti_object.h
similarity index 92%
rename from runtime/openjdkjvmti/ti_object.h
rename to openjdkjvmti/ti_object.h
index 09eee61..fa3bd0f 100644
--- a/runtime/openjdkjvmti/ti_object.h
+++ b/openjdkjvmti/ti_object.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_OBJECT_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_OBJECT_H_
+#ifndef ART_OPENJDKJVMTI_TI_OBJECT_H_
+#define ART_OPENJDKJVMTI_TI_OBJECT_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -46,4 +46,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_OBJECT_H_
+#endif  // ART_OPENJDKJVMTI_TI_OBJECT_H_
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/openjdkjvmti/ti_phase.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_phase.cc
rename to openjdkjvmti/ti_phase.cc
diff --git a/runtime/openjdkjvmti/ti_phase.h b/openjdkjvmti/ti_phase.h
similarity index 93%
rename from runtime/openjdkjvmti/ti_phase.h
rename to openjdkjvmti/ti_phase.h
index a2c0d11..d4ed86b 100644
--- a/runtime/openjdkjvmti/ti_phase.h
+++ b/openjdkjvmti/ti_phase.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_PHASE_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_PHASE_H_
+#ifndef ART_OPENJDKJVMTI_TI_PHASE_H_
+#define ART_OPENJDKJVMTI_TI_PHASE_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -66,4 +66,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_PHASE_H_
+#endif  // ART_OPENJDKJVMTI_TI_PHASE_H_
diff --git a/runtime/openjdkjvmti/ti_properties.cc b/openjdkjvmti/ti_properties.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_properties.cc
rename to openjdkjvmti/ti_properties.cc
diff --git a/runtime/openjdkjvmti/ti_properties.h b/openjdkjvmti/ti_properties.h
similarity index 92%
rename from runtime/openjdkjvmti/ti_properties.h
rename to openjdkjvmti/ti_properties.h
index 7073481..187b85d 100644
--- a/runtime/openjdkjvmti/ti_properties.h
+++ b/openjdkjvmti/ti_properties.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_PROPERTIES_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_PROPERTIES_H_
+#ifndef ART_OPENJDKJVMTI_TI_PROPERTIES_H_
+#define ART_OPENJDKJVMTI_TI_PROPERTIES_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -48,4 +48,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_PROPERTIES_H_
+#endif  // ART_OPENJDKJVMTI_TI_PROPERTIES_H_
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/openjdkjvmti/ti_redefine.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_redefine.cc
rename to openjdkjvmti/ti_redefine.cc
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/openjdkjvmti/ti_redefine.h
similarity index 98%
rename from runtime/openjdkjvmti/ti_redefine.h
rename to openjdkjvmti/ti_redefine.h
index 03b4bf2..984f922 100644
--- a/runtime/openjdkjvmti/ti_redefine.h
+++ b/openjdkjvmti/ti_redefine.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+#ifndef ART_OPENJDKJVMTI_TI_REDEFINE_H_
+#define ART_OPENJDKJVMTI_TI_REDEFINE_H_
 
 #include <string>
 
@@ -265,4 +265,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+#endif  // ART_OPENJDKJVMTI_TI_REDEFINE_H_
diff --git a/runtime/openjdkjvmti/ti_search.cc b/openjdkjvmti/ti_search.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_search.cc
rename to openjdkjvmti/ti_search.cc
diff --git a/runtime/openjdkjvmti/ti_search.h b/openjdkjvmti/ti_search.h
similarity index 92%
rename from runtime/openjdkjvmti/ti_search.h
rename to openjdkjvmti/ti_search.h
index cd7b4be..81a28cc 100644
--- a/runtime/openjdkjvmti/ti_search.h
+++ b/openjdkjvmti/ti_search.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_SEARCH_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_SEARCH_H_
+#ifndef ART_OPENJDKJVMTI_TI_SEARCH_H_
+#define ART_OPENJDKJVMTI_TI_SEARCH_H_
 
 #include <vector>
 
@@ -50,4 +50,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_SEARCH_H_
+#endif  // ART_OPENJDKJVMTI_TI_SEARCH_H_
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/openjdkjvmti/ti_stack.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_stack.cc
rename to openjdkjvmti/ti_stack.cc
diff --git a/runtime/openjdkjvmti/ti_stack.h b/openjdkjvmti/ti_stack.h
similarity index 95%
rename from runtime/openjdkjvmti/ti_stack.h
rename to openjdkjvmti/ti_stack.h
index 6a593cf..2e96b82 100644
--- a/runtime/openjdkjvmti/ti_stack.h
+++ b/openjdkjvmti/ti_stack.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_STACK_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_STACK_H_
+#ifndef ART_OPENJDKJVMTI_TI_STACK_H_
+#define ART_OPENJDKJVMTI_TI_STACK_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -71,4 +71,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_STACK_H_
+#endif  // ART_OPENJDKJVMTI_TI_STACK_H_
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
similarity index 81%
rename from runtime/openjdkjvmti/ti_thread.cc
rename to openjdkjvmti/ti_thread.cc
index 7d42879..6fa73f8 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -159,17 +159,6 @@
   return ERR(NONE);
 }
 
-static art::Thread* GetNativeThreadLocked(jthread thread,
-                                          const art::ScopedObjectAccessAlreadyRunnable& soa)
-    REQUIRES_SHARED(art::Locks::mutator_lock_)
-    REQUIRES(art::Locks::thread_list_lock_) {
-  if (thread == nullptr) {
-    return art::Thread::Current();
-  }
-
-  return art::Thread::FromManagedThread(soa, thread);
-}
-
 // Get the native thread. The spec says a null object denotes the current thread.
 art::Thread* ThreadUtil::GetNativeThread(jthread thread,
                                          const art::ScopedObjectAccessAlreadyRunnable& soa) {
@@ -177,7 +166,6 @@
     return art::Thread::Current();
   }
 
-  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
   return art::Thread::FromManagedThread(soa, thread);
 }
 
@@ -189,18 +177,20 @@
     return JVMTI_ERROR_WRONG_PHASE;
   }
 
-  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);
+  art::MutexLock mu(self, *art::Locks::thread_list_lock_);
 
-  art::Thread* self = GetNativeThread(thread, soa);
-  if (self == nullptr && thread == nullptr) {
+  art::Thread* target = GetNativeThread(thread, soa);
+  if (target == nullptr && thread == nullptr) {
     return ERR(INVALID_THREAD);
   }
 
   JvmtiUniquePtr<char[]> name_uptr;
-  if (self != nullptr) {
+  if (target != nullptr) {
     // Have a native thread object, this thread is alive.
     std::string name;
-    self->GetThreadName(name);
+    target->GetThreadName(name);
     jvmtiError name_result;
     name_uptr = CopyString(env, name.c_str(), &name_result);
     if (name_uptr == nullptr) {
@@ -208,11 +198,11 @@
     }
     info_ptr->name = name_uptr.get();
 
-    info_ptr->priority = self->GetNativePriority();
+    info_ptr->priority = target->GetNativePriority();
 
-    info_ptr->is_daemon = self->IsDaemon();
+    info_ptr->is_daemon = target->IsDaemon();
 
-    art::ObjPtr<art::mirror::Object> peer = self->GetPeerFromOtherThread();
+    art::ObjPtr<art::mirror::Object> peer = target->GetPeerFromOtherThread();
 
     // ThreadGroup.
     if (peer != nullptr) {
@@ -309,9 +299,8 @@
 static InternalThreadState GetNativeThreadState(jthread thread,
                                                 const art::ScopedObjectAccessAlreadyRunnable& soa)
     REQUIRES_SHARED(art::Locks::mutator_lock_)
-    REQUIRES(art::Locks::user_code_suspension_lock_) {
+    REQUIRES(art::Locks::thread_list_lock_, art::Locks::user_code_suspension_lock_) {
   art::Thread* self = nullptr;
-  art::MutexLock tll_mu(soa.Self(), *art::Locks::thread_list_lock_);
   if (thread == nullptr) {
     self = art::Thread::Current();
   } else {
@@ -455,43 +444,46 @@
       }
     }
     art::ScopedObjectAccess soa(self);
+    art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
     state = GetNativeThreadState(thread, soa);
-    break;
+    if (state.art_state == art::ThreadState::kStarting) {
+      break;
+    }
+    DCHECK(state.native_thread != nullptr);
+
+    // Translate internal thread state to JVMTI and Java state.
+    jint jvmti_state = GetJvmtiThreadStateFromInternal(state);
+
+    // Java state is derived from nativeGetState.
+    // TODO: Our implementation assigns "runnable" to suspended. As such, we will have slightly
+    //       different mask if a thread got suspended due to user-code. However, this is for
+    //       consistency with the Java view.
+    jint java_state = GetJavaStateFromInternal(state);
+
+    *thread_state_ptr = jvmti_state | java_state;
+
+    return ERR(NONE);
   } while (true);
 
-  if (state.art_state == art::ThreadState::kStarting) {
-    if (thread == nullptr) {
-      // No native thread, and no Java thread? We must be starting up. Report as wrong phase.
-      return ERR(WRONG_PHASE);
-    }
+  DCHECK_EQ(state.art_state, art::ThreadState::kStarting);
 
-    art::ScopedObjectAccess soa(self);
-
-    // Need to read the Java "started" field to know whether this is starting or terminated.
-    art::ObjPtr<art::mirror::Object> peer = soa.Decode<art::mirror::Object>(thread);
-    art::ObjPtr<art::mirror::Class> klass = peer->GetClass();
-    art::ArtField* started_field = klass->FindDeclaredInstanceField("started", "Z");
-    CHECK(started_field != nullptr);
-    bool started = started_field->GetBoolean(peer) != 0;
-    constexpr jint kStartedState = JVMTI_JAVA_LANG_THREAD_STATE_NEW;
-    constexpr jint kTerminatedState = JVMTI_THREAD_STATE_TERMINATED |
-                                      JVMTI_JAVA_LANG_THREAD_STATE_TERMINATED;
-    *thread_state_ptr = started ? kTerminatedState : kStartedState;
-    return ERR(NONE);
+  if (thread == nullptr) {
+    // No native thread, and no Java thread? We must be starting up. Report as wrong phase.
+    return ERR(WRONG_PHASE);
   }
-  DCHECK(state.native_thread != nullptr);
 
-  // Translate internal thread state to JVMTI and Java state.
-  jint jvmti_state = GetJvmtiThreadStateFromInternal(state);
+  art::ScopedObjectAccess soa(self);
 
-  // Java state is derived from nativeGetState.
-  // TODO: Our implementation assigns "runnable" to suspended. As such, we will have slightly
-  //       different mask if a thread got suspended due to user-code. However, this is for
-  //       consistency with the Java view.
-  jint java_state = GetJavaStateFromInternal(state);
-
-  *thread_state_ptr = jvmti_state | java_state;
-
+  // Need to read the Java "started" field to know whether this is starting or terminated.
+  art::ObjPtr<art::mirror::Object> peer = soa.Decode<art::mirror::Object>(thread);
+  art::ObjPtr<art::mirror::Class> klass = peer->GetClass();
+  art::ArtField* started_field = klass->FindDeclaredInstanceField("started", "Z");
+  CHECK(started_field != nullptr);
+  bool started = started_field->GetBoolean(peer) != 0;
+  constexpr jint kStartedState = JVMTI_JAVA_LANG_THREAD_STATE_NEW;
+  constexpr jint kTerminatedState = JVMTI_THREAD_STATE_TERMINATED |
+                                    JVMTI_JAVA_LANG_THREAD_STATE_TERMINATED;
+  *thread_state_ptr = started ? kTerminatedState : kStartedState;
   return ERR(NONE);
 }
 
@@ -570,7 +562,7 @@
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);
   art::MutexLock mu(self, *art::Locks::thread_list_lock_);
-  art::Thread* target = GetNativeThreadLocked(thread, soa);
+  art::Thread* target = GetNativeThread(thread, soa);
   if (target == nullptr && thread == nullptr) {
     return ERR(INVALID_THREAD);
   }
@@ -599,7 +591,7 @@
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);
   art::MutexLock mu(self, *art::Locks::thread_list_lock_);
-  art::Thread* target = GetNativeThreadLocked(thread, soa);
+  art::Thread* target = GetNativeThread(thread, soa);
   if (target == nullptr && thread == nullptr) {
     return ERR(INVALID_THREAD);
   }
@@ -699,8 +691,7 @@
 }
 
 jvmtiError ThreadUtil::SuspendOther(art::Thread* self,
-                                    jthread target_jthread,
-                                    const art::Thread* target) {
+                                    jthread target_jthread) {
   // Loop since we need to bail out and try again if we would end up getting suspended while holding
   // the user_code_suspension_lock_ due to a SuspendReason::kForUserCode. In this situation we
   // release the lock, wait to get resumed and try again.
@@ -713,33 +704,43 @@
     SuspendCheck(self);
     art::MutexLock mu(self, *art::Locks::user_code_suspension_lock_);
     {
-      art::MutexLock thread_list_mu(self, *art::Locks::thread_suspend_count_lock_);
+      art::MutexLock thread_suspend_count_mu(self, *art::Locks::thread_suspend_count_lock_);
       // Make sure we won't be suspended in the middle of holding the thread_suspend_count_lock_ by
       // a user-code suspension. We retry and do another SuspendCheck to clear this.
       if (self->GetUserCodeSuspendCount() != 0) {
         continue;
-      } else if (target->GetUserCodeSuspendCount() != 0) {
-        return ERR(THREAD_SUSPENDED);
       }
+      // We are not going to be suspended by user code from now on.
     }
-    bool timeout = true;
-    while (timeout) {
+    {
+      art::ScopedObjectAccess soa(self);
+      art::MutexLock thread_list_mu(self, *art::Locks::thread_list_lock_);
+      art::Thread* target = GetNativeThread(target_jthread, soa);
       art::ThreadState state = target->GetState();
       if (state == art::ThreadState::kTerminated || state == art::ThreadState::kStarting) {
         return ERR(THREAD_NOT_ALIVE);
-      }
-      art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
-          target_jthread,
-          /* request_suspension */ true,
-          art::SuspendReason::kForUserCode,
-          &timeout);
-      if (ret_target == nullptr && !timeout) {
-        // TODO It would be good to get more information about why exactly the thread failed to
-        // suspend.
-        return ERR(INTERNAL);
+      } else {
+        art::MutexLock thread_suspend_count_mu(self, *art::Locks::thread_suspend_count_lock_);
+        if (target->GetUserCodeSuspendCount() != 0) {
+          return ERR(THREAD_SUSPENDED);
+        }
       }
     }
-    return OK;
+    bool timeout = true;
+    art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
+        target_jthread,
+        /* request_suspension */ true,
+        art::SuspendReason::kForUserCode,
+        &timeout);
+    if (ret_target == nullptr && !timeout) {
+      // TODO It would be good to get more information about why exactly the thread failed to
+      // suspend.
+      return ERR(INTERNAL);
+    } else if (!timeout) {
+      // we didn't time out and got a result.
+      return OK;
+    }
+    // We timed out. Just go around and try again.
   } while (true);
   UNREACHABLE();
 }
@@ -768,18 +769,21 @@
 
 jvmtiError ThreadUtil::SuspendThread(jvmtiEnv* env ATTRIBUTE_UNUSED, jthread thread) {
   art::Thread* self = art::Thread::Current();
-  art::Thread* target;
+  bool target_is_self = false;
   {
     art::ScopedObjectAccess soa(self);
-    target = GetNativeThread(thread, soa);
+    art::MutexLock mu(self, *art::Locks::thread_list_lock_);
+    art::Thread* target = GetNativeThread(thread, soa);
+    if (target == nullptr) {
+      return ERR(INVALID_THREAD);
+    } else if (target == self) {
+      target_is_self = true;
+    }
   }
-  if (target == nullptr) {
-    return ERR(INVALID_THREAD);
-  }
-  if (target == self) {
+  if (target_is_self) {
     return SuspendSelf(self);
   } else {
-    return SuspendOther(self, thread, target);
+    return SuspendOther(self, thread);
   }
 }
 
@@ -790,41 +794,56 @@
   }
   art::Thread* self = art::Thread::Current();
   art::Thread* target;
-  {
-    // NB This does a SuspendCheck (during thread state change) so we need to make sure we don't
-    // have the 'suspend_lock' locked here.
-    art::ScopedObjectAccess soa(self);
-    target = GetNativeThread(thread, soa);
-  }
-  if (target == nullptr) {
-    return ERR(INVALID_THREAD);
-  } else if (target == self) {
-    // We would have paused until we aren't suspended anymore due to the ScopedObjectAccess so we
-    // can just return THREAD_NOT_SUSPENDED. Unfortunately we cannot do any real DCHECKs about
-    // current state since it's all concurrent.
-    return ERR(THREAD_NOT_SUSPENDED);
-  }
-  // Now that we know we aren't getting suspended ourself (since we have a mutator lock) we lock the
-  // suspend_lock to start suspending.
-  art::MutexLock mu(self, *art::Locks::user_code_suspension_lock_);
-  {
-    // The JVMTI spec requires us to return THREAD_NOT_SUSPENDED if it is alive but we really cannot
-    // tell why resume failed.
-    art::MutexLock thread_list_mu(self, *art::Locks::thread_suspend_count_lock_);
-    if (target->GetUserCodeSuspendCount() == 0) {
-      return ERR(THREAD_NOT_SUSPENDED);
+  // Retry until we know we won't get suspended by user code while resuming something.
+  do {
+    SuspendCheck(self);
+    art::MutexLock ucsl_mu(self, *art::Locks::user_code_suspension_lock_);
+    {
+      art::MutexLock tscl_mu(self, *art::Locks::thread_suspend_count_lock_);
+      // Make sure we won't be suspended in the middle of holding the thread_suspend_count_lock_ by
+      // a user-code suspension. We retry and do another SuspendCheck to clear this.
+      if (self->GetUserCodeSuspendCount() != 0) {
+        continue;
+      }
     }
-  }
-  if (target->GetState() == art::ThreadState::kTerminated) {
-    return ERR(THREAD_NOT_ALIVE);
-  }
-  DCHECK(target != self);
-  if (!art::Runtime::Current()->GetThreadList()->Resume(target, art::SuspendReason::kForUserCode)) {
-    // TODO Give a better error.
-    // This is most likely THREAD_NOT_SUSPENDED but we cannot really be sure.
-    return ERR(INTERNAL);
-  }
-  return OK;
+    // From now on we know we cannot get suspended by user-code.
+    {
+      // NB This does a SuspendCheck (during thread state change) so we need to make sure we don't
+      // have the 'suspend_lock' locked here.
+      art::ScopedObjectAccess soa(self);
+      art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+      target = GetNativeThread(thread, soa);
+      if (target == nullptr) {
+        return ERR(INVALID_THREAD);
+      } else if (target == self) {
+        // We would have paused until we aren't suspended anymore due to the ScopedObjectAccess so
+        // we can just return THREAD_NOT_SUSPENDED. Unfortunately we cannot do any real DCHECKs
+        // about current state since it's all concurrent.
+        return ERR(THREAD_NOT_SUSPENDED);
+      } else if (target->GetState() == art::ThreadState::kTerminated) {
+        return ERR(THREAD_NOT_ALIVE);
+      }
+      // The JVMTI spec requires us to return THREAD_NOT_SUSPENDED if it is alive but we really
+      // cannot tell why resume failed.
+      {
+        art::MutexLock thread_suspend_count_mu(self, *art::Locks::thread_suspend_count_lock_);
+        if (target->GetUserCodeSuspendCount() == 0) {
+          return ERR(THREAD_NOT_SUSPENDED);
+        }
+      }
+    }
+    // It is okay that we don't have a thread_list_lock here since we know that the thread cannot
+    // die since it is currently held suspended by a SuspendReason::kForUserCode suspend.
+    DCHECK(target != self);
+    if (!art::Runtime::Current()->GetThreadList()->Resume(target,
+                                                          art::SuspendReason::kForUserCode)) {
+      // TODO Give a better error.
+      // This is most likely THREAD_NOT_SUSPENDED but we cannot really be sure.
+      return ERR(INTERNAL);
+    } else {
+      return OK;
+    }
+  } while (true);
 }
 
 // Suspends all the threads in the list at the same time. Getting this behavior is a little tricky
@@ -850,6 +869,7 @@
   for (jint i = 0; i < request_count; i++) {
     {
       art::ScopedObjectAccess soa(self);
+      art::MutexLock mu(self, *art::Locks::thread_list_lock_);
       if (threads[i] == nullptr || GetNativeThread(threads[i], soa) == self) {
         current_thread_indexes.push_back(i);
         continue;
diff --git a/runtime/openjdkjvmti/ti_thread.h b/openjdkjvmti/ti_thread.h
similarity index 92%
rename from runtime/openjdkjvmti/ti_thread.h
rename to openjdkjvmti/ti_thread.h
index 083bf8d..03c49d7 100644
--- a/runtime/openjdkjvmti/ti_thread.h
+++ b/openjdkjvmti/ti_thread.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_THREAD_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_THREAD_H_
+#ifndef ART_OPENJDKJVMTI_TI_THREAD_H_
+#define ART_OPENJDKJVMTI_TI_THREAD_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -90,7 +90,8 @@
 
   static art::Thread* GetNativeThread(jthread thread,
                                       const art::ScopedObjectAccessAlreadyRunnable& soa)
-      REQUIRES_SHARED(art::Locks::mutator_lock_);
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(art::Locks::thread_list_lock_);
 
  private:
   // We need to make sure only one thread tries to suspend threads at a time so we can get the
@@ -104,9 +105,7 @@
   // cause the thread to wake up if the thread is suspended for the debugger or gc or something.
   static jvmtiError SuspendSelf(art::Thread* self)
       REQUIRES(!art::Locks::mutator_lock_, !art::Locks::user_code_suspension_lock_);
-  static jvmtiError SuspendOther(art::Thread* self,
-                                 jthread target_jthread,
-                                 const art::Thread* target)
+  static jvmtiError SuspendOther(art::Thread* self, jthread target_jthread)
       REQUIRES(!art::Locks::mutator_lock_, !art::Locks::user_code_suspension_lock_);
 
   static art::ArtField* context_class_loader_;
@@ -114,4 +113,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_THREAD_H_
+#endif  // ART_OPENJDKJVMTI_TI_THREAD_H_
diff --git a/runtime/openjdkjvmti/ti_threadgroup.cc b/openjdkjvmti/ti_threadgroup.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_threadgroup.cc
rename to openjdkjvmti/ti_threadgroup.cc
diff --git a/runtime/openjdkjvmti/ti_threadgroup.h b/openjdkjvmti/ti_threadgroup.h
similarity index 93%
rename from runtime/openjdkjvmti/ti_threadgroup.h
rename to openjdkjvmti/ti_threadgroup.h
index c3a0ff5..4911566 100644
--- a/runtime/openjdkjvmti/ti_threadgroup.h
+++ b/openjdkjvmti/ti_threadgroup.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_THREADGROUP_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_THREADGROUP_H_
+#ifndef ART_OPENJDKJVMTI_TI_THREADGROUP_H_
+#define ART_OPENJDKJVMTI_TI_THREADGROUP_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -57,4 +57,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_THREADGROUP_H_
+#endif  // ART_OPENJDKJVMTI_TI_THREADGROUP_H_
diff --git a/runtime/openjdkjvmti/ti_timers.cc b/openjdkjvmti/ti_timers.cc
similarity index 100%
rename from runtime/openjdkjvmti/ti_timers.cc
rename to openjdkjvmti/ti_timers.cc
diff --git a/runtime/openjdkjvmti/ti_timers.h b/openjdkjvmti/ti_timers.h
similarity index 92%
rename from runtime/openjdkjvmti/ti_timers.h
rename to openjdkjvmti/ti_timers.h
index 6300678..892205a 100644
--- a/runtime/openjdkjvmti/ti_timers.h
+++ b/openjdkjvmti/ti_timers.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_TIMERS_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TI_TIMERS_H_
+#ifndef ART_OPENJDKJVMTI_TI_TIMERS_H_
+#define ART_OPENJDKJVMTI_TI_TIMERS_H_
 
 #include "jni.h"
 #include "jvmti.h"
@@ -48,4 +48,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_TIMERS_H_
+#endif  // ART_OPENJDKJVMTI_TI_TIMERS_H_
diff --git a/runtime/openjdkjvmti/transform.cc b/openjdkjvmti/transform.cc
similarity index 100%
rename from runtime/openjdkjvmti/transform.cc
rename to openjdkjvmti/transform.cc
diff --git a/runtime/openjdkjvmti/transform.h b/openjdkjvmti/transform.h
similarity index 94%
rename from runtime/openjdkjvmti/transform.h
rename to openjdkjvmti/transform.h
index ed24068..6bbe60a 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/openjdkjvmti/transform.h
@@ -29,8 +29,8 @@
  * questions.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_TRANSFORM_H_
-#define ART_RUNTIME_OPENJDKJVMTI_TRANSFORM_H_
+#ifndef ART_OPENJDKJVMTI_TRANSFORM_H_
+#define ART_OPENJDKJVMTI_TRANSFORM_H_
 
 #include <string>
 
@@ -65,5 +65,5 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_TRANSFORM_H_
+#endif  // ART_OPENJDKJVMTI_TRANSFORM_H_
 
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 952f8bf..0f5a1a8 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -55,8 +55,9 @@
         "compiler_filter.cc",
         "debugger.cc",
         "dex_file.cc",
-        "dex_file_tracking_registrar.cc",
         "dex_file_annotations.cc",
+        "dex_file_layout.cc",
+        "dex_file_tracking_registrar.cc",
         "dex_file_verifier.cc",
         "dex_instruction.cc",
         "dex_to_dex_decompiler.cc",
@@ -439,6 +440,7 @@
         "debugger.h",
         "base/unix_file/fd_file.h",
         "dex_file.h",
+        "dex_file_layout.h",
         "dex_instruction.h",
         "dex_instruction_utils.h",
         "gc_root.h",
@@ -625,7 +627,3 @@
         "libvixld-arm64",
     ],
 }
-
-subdirs = [
-    "openjdkjvmti",
-]
diff --git a/runtime/aot_class_linker.cc b/runtime/aot_class_linker.cc
index 871604b..9396565 100644
--- a/runtime/aot_class_linker.cc
+++ b/runtime/aot_class_linker.cc
@@ -16,10 +16,12 @@
 
 #include "aot_class_linker.h"
 
+#include "class_reference.h"
+#include "compiler_callbacks.h"
 #include "handle_scope-inl.h"
-#include "mirror/class.h"
-#include "mirror/object-inl.h"
+#include "mirror/class-inl.h"
 #include "runtime.h"
+#include "verifier/verifier_enums.h"
 
 namespace art {
 
@@ -31,18 +33,29 @@
 bool AotClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass,
                                   bool can_init_statics, bool can_init_parents) {
   Runtime* const runtime = Runtime::Current();
+  bool strict_mode_ = runtime->IsActiveStrictTransactionMode();
 
   DCHECK(klass != nullptr);
   if (klass->IsInitialized() || klass->IsInitializing()) {
     return ClassLinker::InitializeClass(self, klass, can_init_statics, can_init_parents);
   }
 
-  if (runtime->IsActiveStrictTransactionMode()) {
+  // Don't initialize klass if it's superclass is not initialized, because superclass might abort
+  // the transaction and rolled back after klass's change is commited.
+  if (strict_mode_ && !klass->IsInterface() && klass->HasSuperClass()) {
+    if (klass->GetSuperClass()->GetStatus() == mirror::Class::kStatusInitializing) {
+      runtime->AbortTransactionAndThrowAbortError(self, "Can't resolve "
+          + klass->PrettyTypeOf() + " because it's superclass is not initialized.");
+      return false;
+    }
+  }
+
+  if (strict_mode_) {
     runtime->EnterTransactionMode(true, klass.Get()->AsClass());
   }
   bool success = ClassLinker::InitializeClass(self, klass, can_init_statics, can_init_parents);
 
-  if (runtime->IsActiveStrictTransactionMode()) {
+  if (strict_mode_) {
     if (success) {
       // Exit Transaction if success.
       runtime->ExitTransactionMode();
@@ -55,4 +68,18 @@
   }
   return success;
 }
+
+verifier::FailureKind AotClassLinker::PerformClassVerification(Thread* self,
+                                                               Handle<mirror::Class> klass,
+                                                               verifier::HardFailLogMode log_level,
+                                                               std::string* error_msg) {
+  Runtime* const runtime = Runtime::Current();
+  CompilerCallbacks* callbacks = runtime->GetCompilerCallbacks();
+  if (callbacks->CanAssumeVerified(ClassReference(&klass->GetDexFile(),
+                                                  klass->GetDexClassDefIndex()))) {
+    return verifier::FailureKind::kNoFailure;
+  }
+  return ClassLinker::PerformClassVerification(self, klass, log_level, error_msg);
+}
+
 }  // namespace art
diff --git a/runtime/aot_class_linker.h b/runtime/aot_class_linker.h
index 11bea86..927b533 100644
--- a/runtime/aot_class_linker.h
+++ b/runtime/aot_class_linker.h
@@ -27,6 +27,16 @@
   explicit AotClassLinker(InternTable *intern_table);
   ~AotClassLinker();
 
+ protected:
+  // Overridden version of PerformClassVerification allows skipping verification if the class was
+  // previously verified but unloaded.
+  verifier::FailureKind PerformClassVerification(Thread* self,
+                                                 Handle<mirror::Class> klass,
+                                                 verifier::HardFailLogMode log_level,
+                                                 std::string* error_msg)
+      OVERRIDE
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   bool InitializeClass(Thread *self,
                        Handle<mirror::Class> klass,
                        bool can_run_clinit,
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index bfac8c4..1ba4070 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -88,6 +88,11 @@
 #undef FRAME_SIZE_SAVE_REFS_ONLY
 static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverythingForClinit = FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT
+static constexpr size_t kFrameSizeSaveEverythingForSuspendCheck =
+    FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
 #undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET
@@ -109,6 +114,11 @@
 #undef FRAME_SIZE_SAVE_REFS_ONLY
 static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverythingForClinit = FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT
+static constexpr size_t kFrameSizeSaveEverythingForSuspendCheck =
+    FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
 #undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET
@@ -126,6 +136,11 @@
 #undef FRAME_SIZE_SAVE_REFS_ONLY
 static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverythingForClinit = FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT
+static constexpr size_t kFrameSizeSaveEverythingForSuspendCheck =
+    FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
 #undef BAKER_MARK_INTROSPECTION_REGISTER_COUNT
@@ -142,6 +157,11 @@
 #undef FRAME_SIZE_SAVE_REFS_ONLY
 static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverythingForClinit = FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT
+static constexpr size_t kFrameSizeSaveEverythingForSuspendCheck =
+    FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
 #undef BAKER_MARK_INTROSPECTION_REGISTER_COUNT
@@ -158,6 +178,11 @@
 #undef FRAME_SIZE_SAVE_REFS_ONLY
 static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverythingForClinit = FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT
+static constexpr size_t kFrameSizeSaveEverythingForSuspendCheck =
+    FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
 }  // namespace x86
@@ -170,25 +195,36 @@
 #undef FRAME_SIZE_SAVE_REFS_ONLY
 static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverythingForClinit = FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT
+static constexpr size_t kFrameSizeSaveEverythingForSuspendCheck =
+    FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK;
+#undef FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
 }  // namespace x86_64
 
 // Check architecture specific constants are sound.
-#define TEST_ARCH(Arch, arch)                             \
-  TEST_F(ArchTest, Arch) {                                \
-    CheckFrameSize(InstructionSet::k##Arch,               \
-                   CalleeSaveType::kSaveAllCalleeSaves,   \
-                   arch::kFrameSizeSaveAllCalleeSaves);   \
-    CheckFrameSize(InstructionSet::k##Arch,               \
-                   CalleeSaveType::kSaveRefsOnly,         \
-                   arch::kFrameSizeSaveRefsOnly);         \
-    CheckFrameSize(InstructionSet::k##Arch,               \
-                   CalleeSaveType::kSaveRefsAndArgs,      \
-                   arch::kFrameSizeSaveRefsAndArgs);      \
-    CheckFrameSize(InstructionSet::k##Arch,               \
-                   CalleeSaveType::kSaveEverything,       \
-                   arch::kFrameSizeSaveEverything);       \
+#define TEST_ARCH(Arch, arch)                                       \
+  TEST_F(ArchTest, Arch) {                                          \
+    CheckFrameSize(InstructionSet::k##Arch,                         \
+                   CalleeSaveType::kSaveAllCalleeSaves,             \
+                   arch::kFrameSizeSaveAllCalleeSaves);             \
+    CheckFrameSize(InstructionSet::k##Arch,                         \
+                   CalleeSaveType::kSaveRefsOnly,                   \
+                   arch::kFrameSizeSaveRefsOnly);                   \
+    CheckFrameSize(InstructionSet::k##Arch,                         \
+                   CalleeSaveType::kSaveRefsAndArgs,                \
+                   arch::kFrameSizeSaveRefsAndArgs);                \
+    CheckFrameSize(InstructionSet::k##Arch,                         \
+                   CalleeSaveType::kSaveEverything,                 \
+                   arch::kFrameSizeSaveEverything);                 \
+    CheckFrameSize(InstructionSet::k##Arch,                         \
+                   CalleeSaveType::kSaveEverythingForClinit,        \
+                   arch::kFrameSizeSaveEverythingForClinit);        \
+    CheckFrameSize(InstructionSet::k##Arch,                         \
+                   CalleeSaveType::kSaveEverythingForSuspendCheck,  \
+                   arch::kFrameSizeSaveEverythingForSuspendCheck);  \
   }
 TEST_ARCH(Arm, arm)
 TEST_ARCH(Arm64, arm64)
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 8f2fd6e..3d85872 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -23,6 +23,8 @@
 #define FRAME_SIZE_SAVE_REFS_ONLY 32
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
 #define FRAME_SIZE_SAVE_EVERYTHING 192
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT FRAME_SIZE_SAVE_EVERYTHING
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK FRAME_SIZE_SAVE_EVERYTHING
 
 // The offset from the art_quick_read_barrier_mark_introspection (used for field
 // loads with 32-bit LDR) to the entrypoint for field loads with 16-bit LDR,
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 375768e..cf2bfee 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -182,7 +182,7 @@
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      * when core registers are already saved.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp
+.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
                                         @ 14 words of callee saves and args already saved.
     vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
     .cfi_adjust_cfa_offset 128
@@ -190,7 +190,7 @@
     .cfi_adjust_cfa_offset 8
     RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
     @ Load kSaveEverything Method* into rTemp.
-    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    ldr \rTemp, [\rTemp, #\runtime_method_offset]
     str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
@@ -204,7 +204,7 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME rTemp
+.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     push {r0-r12, lr}                   @ 14 words of callee saves and args.
     .cfi_adjust_cfa_offset 56
     .cfi_rel_offset r0, 0
@@ -221,7 +221,7 @@
     .cfi_rel_offset r11, 44
     .cfi_rel_offset ip, 48
     .cfi_rel_offset lr, 52
-    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
@@ -1004,10 +1004,10 @@
 .endm
 
 // Macro for string and type resolution and initialization.
-.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     .extern \entrypoint
 ENTRY \name
-    SETUP_SAVE_EVERYTHING_FRAME r1    @ save everything in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
     mov    r1, r9                     @ pass Thread::Current
     bl     \entrypoint                @ (uint32_t index, Thread*)
     cbz    r0, 1f                     @ If result is null, deliver the OOME.
@@ -1021,8 +1021,12 @@
 END \name
 .endm
 
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
+    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
+.endm
+
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
@@ -1537,7 +1541,7 @@
 1:
     mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
 #endif
-    SETUP_SAVE_EVERYTHING_FRAME r0              @ save everything for GC stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
     mov    r0, rSELF
     bl     artTestSuspendFromCode               @ (Thread*)
     RESTORE_SAVE_EVERYTHING_FRAME
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 39061f0..5c5b81b 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -56,6 +56,7 @@
     kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills;
 
 constexpr uint32_t ArmCalleeSaveCoreSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kArmCalleeSaveAllSpills : 0) |
@@ -63,6 +64,7 @@
 }
 
 constexpr uint32_t ArmCalleeSaveFpSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kArmCalleeSaveFpArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kArmCalleeSaveFpAllSpills : 0) |
@@ -70,29 +72,34 @@
 }
 
 constexpr uint32_t ArmCalleeSaveFrameSize(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return RoundUp((POPCOUNT(ArmCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(ArmCalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kArmPointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return QuickMethodFrameInfo(ArmCalleeSaveFrameSize(type),
                               ArmCalleeSaveCoreSpills(type),
                               ArmCalleeSaveFpSpills(type));
 }
 
 constexpr size_t ArmCalleeSaveFpr1Offset(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return ArmCalleeSaveFrameSize(type) -
          (POPCOUNT(ArmCalleeSaveCoreSpills(type)) +
           POPCOUNT(ArmCalleeSaveFpSpills(type))) * static_cast<size_t>(kArmPointerSize);
 }
 
 constexpr size_t ArmCalleeSaveGpr1Offset(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return ArmCalleeSaveFrameSize(type) -
          POPCOUNT(ArmCalleeSaveCoreSpills(type)) * static_cast<size_t>(kArmPointerSize);
 }
 
 constexpr size_t ArmCalleeSaveLrOffset(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return ArmCalleeSaveFrameSize(type) -
       POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * static_cast<size_t>(kArmPointerSize);
 }
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 6b77200..57376d0 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -23,6 +23,8 @@
 #define FRAME_SIZE_SAVE_REFS_ONLY 96
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 224
 #define FRAME_SIZE_SAVE_EVERYTHING 512
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT FRAME_SIZE_SAVE_EVERYTHING
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK FRAME_SIZE_SAVE_EVERYTHING
 
 // The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
 // i.e. art_quick_read_barrier_mark_introspection_arrays.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d15f5b8..3d8ca40 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -287,7 +287,7 @@
      * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
      * and saving registers x29 and LR is handled elsewhere.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 512)
 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
@@ -337,7 +337,7 @@
     ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
-    ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    ldr xIP0, [xIP0, \runtime_method_offset]
 
     // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
     str xIP0, [sp]
@@ -350,10 +350,10 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
+.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     INCREASE_FRAME 512
     SAVE_TWO_REGS x29, xLR, 496
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
@@ -398,7 +398,7 @@
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
-    RESTORE_REG       x0,      264
+    RESTORE_REG  x0, 264
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
 .endm
 
@@ -1593,10 +1593,10 @@
 .endm
 
 // Macro for string and type resolution and initialization.
-.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     .extern \entrypoint
 ENTRY \name
-    SETUP_SAVE_EVERYTHING_FRAME       // save everything for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset       // save everything for stack crawl
     mov   x1, xSELF                   // pass Thread::Current
     bl    \entrypoint                 // (int32_t index, Thread* self)
     cbz   w0, 1f                      // If result is null, deliver the OOME.
@@ -1611,6 +1611,10 @@
 END \name
 .endm
 
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
+    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
+.endm
+
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
     cbz w0, 1f                 // result zero branch over
     ret                        // return
@@ -1629,9 +1633,8 @@
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
-
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
@@ -2008,7 +2011,7 @@
      */
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
-    SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save callee saves for stack crawl
     mov    x0, xSELF
     bl     artTestSuspendFromCode             // (Thread*)
     RESTORE_SAVE_EVERYTHING_FRAME
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index c231d4d..3e6f6c6 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -80,6 +80,7 @@
     (1 << art::arm64::D30) | (1 << art::arm64::D31);
 
 constexpr uint32_t Arm64CalleeSaveCoreSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kArm64CalleeSaveAllSpills : 0) |
@@ -87,6 +88,7 @@
 }
 
 constexpr uint32_t Arm64CalleeSaveFpSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kArm64CalleeSaveFpArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kArm64CalleeSaveFpAllSpills : 0) |
@@ -94,29 +96,34 @@
 }
 
 constexpr uint32_t Arm64CalleeSaveFrameSize(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return RoundUp((POPCOUNT(Arm64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Arm64CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kArm64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return QuickMethodFrameInfo(Arm64CalleeSaveFrameSize(type),
                               Arm64CalleeSaveCoreSpills(type),
                               Arm64CalleeSaveFpSpills(type));
 }
 
 constexpr size_t Arm64CalleeSaveFpr1Offset(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return Arm64CalleeSaveFrameSize(type) -
          (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) +
           POPCOUNT(Arm64CalleeSaveFpSpills(type))) * static_cast<size_t>(kArm64PointerSize);
 }
 
 constexpr size_t Arm64CalleeSaveGpr1Offset(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return Arm64CalleeSaveFrameSize(type) -
          POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * static_cast<size_t>(kArm64PointerSize);
 }
 
 constexpr size_t Arm64CalleeSaveLrOffset(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return Arm64CalleeSaveFrameSize(type) -
       POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) *
       static_cast<size_t>(kArm64PointerSize);
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 9d8572f..2edd63f 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -23,6 +23,8 @@
 #define FRAME_SIZE_SAVE_REFS_ONLY 48
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
 #define FRAME_SIZE_SAVE_EVERYTHING 256
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT FRAME_SIZE_SAVE_EVERYTHING
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK FRAME_SIZE_SAVE_EVERYTHING
 
 // &art_quick_read_barrier_mark_introspection is the first of many entry points:
 //   21 entry points for long field offsets, large array indices and variable array indices
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 01879a5..45a21ab 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -65,6 +65,7 @@
     (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1u << art::mips::F31);
 
 constexpr uint32_t MipsCalleeSaveCoreSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kMipsCalleeSaveAllSpills : 0) |
@@ -72,6 +73,7 @@
 }
 
 constexpr uint32_t MipsCalleeSaveFPSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kMipsCalleeSaveAllFPSpills : 0) |
@@ -79,12 +81,14 @@
 }
 
 constexpr uint32_t MipsCalleeSaveFrameSize(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return RoundUp((POPCOUNT(MipsCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(MipsCalleeSaveFPSpills(type))   /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kMipsPointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return QuickMethodFrameInfo(MipsCalleeSaveFrameSize(type),
                               MipsCalleeSaveCoreSpills(type),
                               MipsCalleeSaveFPSpills(type));
diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h
index 7185da5..a8e907e 100644
--- a/runtime/arch/mips64/asm_support_mips64.h
+++ b/runtime/arch/mips64/asm_support_mips64.h
@@ -27,6 +27,8 @@
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 208
 // $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method*
 #define FRAME_SIZE_SAVE_EVERYTHING 496
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT FRAME_SIZE_SAVE_EVERYTHING
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK FRAME_SIZE_SAVE_EVERYTHING
 
 // &art_quick_read_barrier_mark_introspection is the first of many entry points:
 //   20 entry points for long field offsets, large array indices and variable array indices
diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h
index a55ab0e..520f631 100644
--- a/runtime/arch/mips64/quick_method_frame_info_mips64.h
+++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h
@@ -72,6 +72,7 @@
     (1 << art::mips64::F30) | (1 << art::mips64::F31);
 
 constexpr uint32_t Mips64CalleeSaveCoreSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kMips64CalleeSaveAllSpills : 0) |
@@ -79,6 +80,7 @@
 }
 
 constexpr uint32_t Mips64CalleeSaveFpSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kMips64CalleeSaveFpRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kMips64CalleeSaveFpArgSpills : 0) |
       (type == CalleeSaveType::kSaveAllCalleeSaves ? kMips64CalleeSaveFpAllSpills : 0) |
@@ -86,12 +88,14 @@
 }
 
 constexpr uint32_t Mips64CalleeSaveFrameSize(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return RoundUp((POPCOUNT(Mips64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Mips64CalleeSaveFpSpills(type))   /* fprs */ +
                   + 1 /* Method* */) * static_cast<size_t>(kMips64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo Mips64CalleeSaveMethodFrameInfo(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return QuickMethodFrameInfo(Mips64CalleeSaveFrameSize(type),
                               Mips64CalleeSaveCoreSpills(type),
                               Mips64CalleeSaveFpSpills(type));
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index 2bba08d..737d736 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -23,5 +23,7 @@
 #define FRAME_SIZE_SAVE_REFS_ONLY 32
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS (32 + 32)
 #define FRAME_SIZE_SAVE_EVERYTHING (48 + 64)
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT FRAME_SIZE_SAVE_EVERYTHING
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK FRAME_SIZE_SAVE_EVERYTHING
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 48d2de9..4e5e93a 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -226,7 +226,7 @@
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      * when EDI and ESI are already saved.
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED, got_reg, temp_reg)
+MACRO3(SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED, got_reg, temp_reg, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     // Save core registers from highest to lowest to agree with core spills bitmap.
     // EDI and ESI, or at least placeholders for them, are already on the stack.
     PUSH ebp
@@ -252,7 +252,7 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save everything callee-save method.
-    pushl RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(REG_VAR(temp_reg))
+    pushl \runtime_method_offset(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
@@ -269,20 +269,20 @@
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      * when EDI is already saved.
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+MACRO3(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     // Save core registers from highest to lowest to agree with core spills bitmap.
     // EDI, or at least a placeholder for it, is already on the stack.
     PUSH esi
-    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg), \runtime_method_offset
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+MACRO3(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     PUSH edi
-    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg), \runtime_method_offset
 END_MACRO
 
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
@@ -923,9 +923,9 @@
 END_MACRO
 
 // Macro for string and type resolution and initialization.
-MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name)
+MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx              // save ref containing registers for GC
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx, \runtime_method_offset  // save ref containing registers for GC
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                       // push padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -947,6 +947,10 @@
     END_FUNCTION VAR(c_name)
 END_MACRO
 
+MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
+    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
+END_MACRO
+
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
     testl %eax, %eax               // eax == 0 ?
     jz  1f                         // if eax == 0 goto 1
@@ -1270,8 +1274,8 @@
 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
 
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
@@ -1598,7 +1602,7 @@
 END_FUNCTION art_quick_memcpy
 
 DEFINE_FUNCTION art_quick_test_suspend
-    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx              // save everything for GC
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp                      // push padding
     CFI_ADJUST_CFA_OFFSET(12)
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index 8342c9f..9a66333 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -57,23 +57,27 @@
     (1 << art::x86::XMM6) | (1 << art::x86::XMM7);
 
 constexpr uint32_t X86CalleeSaveCoreSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
       (type == CalleeSaveType::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFpSpills(CalleeSaveType type) {
-    return (type == CalleeSaveType::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
-           (type == CalleeSaveType::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
+  type = GetCanonicalCalleeSaveType(type);
+  return (type == CalleeSaveType::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
+         (type == CalleeSaveType::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFrameSize(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ +
                   2 * POPCOUNT(X86CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kX86PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return QuickMethodFrameInfo(X86CalleeSaveFrameSize(type),
                               X86CalleeSaveCoreSpills(type),
                               X86CalleeSaveFpSpills(type));
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index a4446d3..51befbe 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -23,5 +23,7 @@
 #define FRAME_SIZE_SAVE_REFS_ONLY (64 + 4*8)
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS (112 + 12*8)
 #define FRAME_SIZE_SAVE_EVERYTHING (144 + 16*8)
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_CLINIT FRAME_SIZE_SAVE_EVERYTHING
+#define FRAME_SIZE_SAVE_EVERYTHING_FOR_SUSPEND_CHECK FRAME_SIZE_SAVE_EVERYTHING
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0a9199e..73e8610 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -272,7 +272,7 @@
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      * when R14 and R15 are already saved.
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED)
+MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
 #if defined(__APPLE__)
     int3
     int3
@@ -316,7 +316,7 @@
     movq %xmm14, 120(%rsp)
     movq %xmm15, 128(%rsp)
     // Push ArtMethod* for save everything frame method.
-    pushq RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(%r10)
+    pushq \runtime_method_offset(%r10)
     CFI_ADJUST_CFA_OFFSET(8)
     // Store rsp as the top quick frame.
     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
@@ -334,18 +334,18 @@
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      * when R15 is already saved.
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
+MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     PUSH r14
-    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     PUSH r15
-    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
 END_MACRO
 
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
@@ -951,9 +951,9 @@
 END_MACRO
 
 // Macro for string and type resolution and initialization.
-MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name)
+MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_EVERYTHING_FRAME                   // save everything for GC
+    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
     // Outgoing argument set up
     movl %eax, %edi                               // pass string index
     movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
@@ -970,6 +970,10 @@
     END_FUNCTION VAR(c_name)
 END_MACRO
 
+MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
+    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
+END_MACRO
+
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
     testq %rax, %rax               // rax == 0 ?
     jz  1f                         // if rax == 0 goto 1
@@ -1290,8 +1294,8 @@
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
@@ -1575,7 +1579,7 @@
 END_FUNCTION art_quick_memcpy
 
 DEFINE_FUNCTION art_quick_test_suspend
-    SETUP_SAVE_EVERYTHING_FRAME                 // save everything for GC
+    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
     call SYMBOL(artTestSuspendFromCode)         // (Thread*)
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 425d616..ebf976e 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -56,24 +56,28 @@
     (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11);
 
 constexpr uint32_t X86_64CalleeSaveCoreSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
       (type == CalleeSaveType::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFpSpills(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return kX86_64CalleeSaveFpSpills |
       (type == CalleeSaveType::kSaveRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
       (type == CalleeSaveType::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFrameSize(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return RoundUp((POPCOUNT(X86_64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(X86_64CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kX86_64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86_64CalleeSaveMethodFrameInfo(CalleeSaveType type) {
+  type = GetCanonicalCalleeSaveType(type);
   return QuickMethodFrameInfo(X86_64CalleeSaveFrameSize(type),
                               X86_64CalleeSaveCoreSpills(type),
                               X86_64CalleeSaveFpSpills(type));
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index fad9278..50e9144 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -285,6 +285,10 @@
     return "<runtime internal callee-save reference and argument registers method>";
   } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything)) {
     return "<runtime internal save-every-register method>";
+  } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForClinit)) {
+    return "<runtime internal save-every-register method for clinit>";
+  } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForSuspendCheck)) {
+    return "<runtime internal save-every-register method for suspend check>";
   } else {
     return "<unknown runtime internal method>";
   }
diff --git a/runtime/base/callee_save_type.h b/runtime/base/callee_save_type.h
index 501b296..e9cd63c 100644
--- a/runtime/base/callee_save_type.h
+++ b/runtime/base/callee_save_type.h
@@ -28,10 +28,20 @@
   kSaveRefsOnly,        // Only those callee-save registers that can hold references.
   kSaveRefsAndArgs,     // References (see above) and arguments (usually caller-save registers).
   kSaveEverything,      // All registers, including both callee-save and caller-save.
+  kSaveEverythingForClinit,    // Special kSaveEverything for clinit.
+  kSaveEverythingForSuspendCheck,  // Special kSaveEverything for suspend check.
   kLastCalleeSaveType   // Value used for iteration.
 };
 std::ostream& operator<<(std::ostream& os, const CalleeSaveType& rhs);
 
+static inline constexpr CalleeSaveType GetCanonicalCalleeSaveType(CalleeSaveType type) {
+  if (type == CalleeSaveType::kSaveEverythingForClinit ||
+      type == CalleeSaveType::kSaveEverythingForSuspendCheck) {
+    return CalleeSaveType::kSaveEverything;
+  }
+  return type;
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_CALLEE_SAVE_TYPE_H_
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 0c73ce7..eb8ced0 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -438,4 +438,30 @@
   return true;
 }
 
+int FdFile::Compare(FdFile* other) {
+  int64_t length = GetLength();
+  int64_t length2 = other->GetLength();
+  if (length != length2) {
+    return length < length2 ? -1 : 1;
+  }
+  static const size_t kBufferSize = 4096;
+  std::unique_ptr<uint8_t[]> buffer1(new uint8_t[kBufferSize]);
+  std::unique_ptr<uint8_t[]> buffer2(new uint8_t[kBufferSize]);
+  while (length > 0) {
+    size_t len = std::min(kBufferSize, static_cast<size_t>(length));
+    if (!ReadFully(&buffer1[0], len)) {
+      return -1;
+    }
+    if (!other->ReadFully(&buffer2[0], len)) {
+      return 1;
+    }
+    int result = memcmp(&buffer1[0], &buffer2[0], len);
+    if (result != 0) {
+      return result;
+    }
+    length -= len;
+  }
+  return 0;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index e07c3fd..91b08bc 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -145,6 +145,11 @@
   // WARNING: Only use this when you know what you're doing!
   void MarkUnchecked();
 
+  // Compare against another file. Returns 0 if the files are equivalent, otherwise returns -1 or 1
+  // depending on if the lenghts are different. If the lengths are the same, the function returns
+  // the difference of the first byte that differs.
+  int Compare(FdFile* other);
+
  protected:
   // If the guard state indicates checking (!=kNoCheck), go to the target state "target". Print the
   // given warning if the current state is or exceeds warn_threshold.
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 6aef348..8b1a115 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -220,4 +220,58 @@
   EXPECT_FALSE(art::OS::FileExists(filename.c_str())) << filename;
 }
 
+TEST_F(FdFileTest, Compare) {
+  std::vector<uint8_t> buffer;
+  constexpr int64_t length = 17 * art::KB;
+  for (size_t i = 0; i < length; ++i) {
+    buffer.push_back(static_cast<uint8_t>(i));
+  }
+
+  auto reset_compare = [&](art::ScratchFile& a, art::ScratchFile& b) {
+    a.GetFile()->ResetOffset();
+    b.GetFile()->ResetOffset();
+    return a.GetFile()->Compare(b.GetFile());
+  };
+
+  art::ScratchFile tmp;
+  EXPECT_TRUE(tmp.GetFile()->WriteFully(&buffer[0], length));
+  EXPECT_EQ(tmp.GetFile()->GetLength(), length);
+
+  art::ScratchFile tmp2;
+  EXPECT_TRUE(tmp2.GetFile()->WriteFully(&buffer[0], length));
+  EXPECT_EQ(tmp2.GetFile()->GetLength(), length);
+
+  // Basic equality check.
+  tmp.GetFile()->ResetOffset();
+  tmp2.GetFile()->ResetOffset();
+  // Files should be the same.
+  EXPECT_EQ(reset_compare(tmp, tmp2), 0);
+
+  // Change a byte near the start.
+  ++buffer[2];
+  art::ScratchFile tmp3;
+  EXPECT_TRUE(tmp3.GetFile()->WriteFully(&buffer[0], length));
+  --buffer[2];
+  EXPECT_NE(reset_compare(tmp, tmp3), 0);
+
+  // Change a byte near the middle.
+  ++buffer[length / 2];
+  art::ScratchFile tmp4;
+  EXPECT_TRUE(tmp4.GetFile()->WriteFully(&buffer[0], length));
+  --buffer[length / 2];
+  EXPECT_NE(reset_compare(tmp, tmp4), 0);
+
+  // Change a byte near the end.
+  ++buffer[length - 5];
+  art::ScratchFile tmp5;
+  EXPECT_TRUE(tmp5.GetFile()->WriteFully(&buffer[0], length));
+  --buffer[length - 5];
+  EXPECT_NE(reset_compare(tmp, tmp5), 0);
+
+  // Reference check
+  art::ScratchFile tmp6;
+  EXPECT_TRUE(tmp6.GetFile()->WriteFully(&buffer[0], length));
+  EXPECT_EQ(reset_compare(tmp, tmp6), 0);
+}
+
 }  // namespace unix_file
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 0096c37..439ecaf 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -312,6 +312,17 @@
   return klass.Ptr();
 }
 
+template <class Visitor>
+inline void ClassLinker::VisitClassTables(const Visitor& visitor) {
+  Thread* const self = Thread::Current();
+  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  for (const ClassLoaderData& data : class_loaders_) {
+    if (data.class_table != nullptr) {
+      visitor(data.class_table);
+    }
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_LINKER_INL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index a264867..845bd6d 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3555,6 +3555,14 @@
   data.resolved_methods = dex_cache->GetResolvedMethods();
   data.class_table = ClassTableForClassLoader(class_loader);
   DCHECK(data.class_table != nullptr);
+  // Make sure to hold the dex cache live in the class table. This case happens for the boot class
+  // path dex caches without an image.
+  data.class_table->InsertStrongRoot(dex_cache);
+  if (class_loader != nullptr) {
+    // Since we added a strong root to the class table, do the write barrier as required for
+    // remembered sets and generational GCs.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+  }
   dex_caches_.push_back(data);
 }
 
@@ -3686,7 +3694,8 @@
 
 ObjPtr<mirror::DexCache> ClassLinker::FindDexCache(Thread* self, const DexFile& dex_file) {
   ReaderMutexLock mu(self, *Locks::dex_lock_);
-  ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, FindDexCacheDataLocked(dex_file));
+  DexCacheData dex_cache_data = FindDexCacheDataLocked(dex_file);
+  ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, dex_cache_data);
   if (dex_cache != nullptr) {
     return dex_cache;
   }
@@ -3696,7 +3705,8 @@
       LOG(FATAL_WITHOUT_ABORT) << "Registered dex file " << data.dex_file->GetLocation();
     }
   }
-  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation();
+  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation()
+             << " " << &dex_file << " " << dex_cache_data.dex_file;
   UNREACHABLE();
 }
 
@@ -4272,13 +4282,7 @@
   std::string error_msg;
   verifier::FailureKind verifier_failure = verifier::FailureKind::kNoFailure;
   if (!preverified) {
-    Runtime* runtime = Runtime::Current();
-    verifier_failure = verifier::MethodVerifier::VerifyClass(self,
-                                                             klass.Get(),
-                                                             runtime->GetCompilerCallbacks(),
-                                                             runtime->IsAotCompiler(),
-                                                             log_level,
-                                                             &error_msg);
+    verifier_failure = PerformClassVerification(self, klass, log_level, &error_msg);
   }
 
   // Verification is done, grab the lock again.
@@ -4346,6 +4350,19 @@
   return verifier_failure;
 }
 
+verifier::FailureKind ClassLinker::PerformClassVerification(Thread* self,
+                                                            Handle<mirror::Class> klass,
+                                                            verifier::HardFailLogMode log_level,
+                                                            std::string* error_msg) {
+  Runtime* const runtime = Runtime::Current();
+  return verifier::MethodVerifier::VerifyClass(self,
+                                               klass.Get(),
+                                               runtime->GetCompilerCallbacks(),
+                                               runtime->IsAotCompiler(),
+                                               log_level,
+                                               error_msg);
+}
+
 bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file,
                                           ObjPtr<mirror::Class> klass,
                                           mirror::Class::Status& oat_file_class_status) {
@@ -4852,11 +4869,16 @@
       return WaitForInitializeClass(klass, self, lock);
     }
 
+    // Try to get the oat class's status for this class if the oat file is present. The compiler
+    // tries to validate superclass descriptors, and writes the result into the oat file.
+    // Runtime correctness is guaranteed by classpath checks done on loading. If the classpath
+    // is different at runtime than it was at compile time, the oat file is rejected. So if the
+    // oat file is present, the classpaths must match, and the runtime time check can be skipped.
     bool has_oat_class = false;
-    const OatFile::OatClass oat_class =
-        (Runtime::Current()->IsStarted() && !Runtime::Current()->IsAotCompiler())
-            ? OatFile::FindOatClass(klass->GetDexFile(), klass->GetDexClassDefIndex(), &has_oat_class)
-            : OatFile::OatClass::Invalid();
+    const Runtime* runtime = Runtime::Current();
+    const OatFile::OatClass oat_class = (runtime->IsStarted() && !runtime->IsAotCompiler())
+        ? OatFile::FindOatClass(klass->GetDexFile(), klass->GetDexClassDefIndex(), &has_oat_class)
+        : OatFile::OatClass::Invalid();
     if (oat_class.GetStatus() < mirror::Class::kStatusSuperclassValidated &&
         !ValidateSuperClassDescriptors(klass)) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusErrorResolved, self);
@@ -4887,7 +4909,9 @@
       if (!super_initialized) {
         // The super class was verified ahead of entering initializing, we should only be here if
         // the super class became erroneous due to initialization.
-        CHECK(handle_scope_super->IsErroneous() && self->IsExceptionPending())
+        // For the case of aot compiler, the super class might also be initializing but we don't
+        // want to process circular dependencies in pre-compile.
+        CHECK(self->IsExceptionPending())
             << "Super class initialization failed for "
             << handle_scope_super->PrettyDescriptor()
             << " that has unexpected status " << handle_scope_super->GetStatus()
@@ -7104,7 +7128,7 @@
                                                                       method_alignment_);
   const size_t old_methods_ptr_size = (old_methods != nullptr) ? old_size : 0;
   auto* methods = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      Runtime::Current()->GetLinearAlloc()->Realloc(
+      class_linker_->GetAllocatorForClassLoader(klass_->GetClassLoader())->Realloc(
           self_, old_methods, old_methods_ptr_size, new_size));
   CHECK(methods != nullptr);  // Native allocation failure aborts.
 
@@ -8574,15 +8598,6 @@
   return GetQuickGenericJniStub();
 }
 
-void ClassLinker::SetEntryPointsToCompiledCode(ArtMethod* method, const void* code) const {
-  CHECK(code != nullptr);
-  const uint8_t* base = reinterpret_cast<const uint8_t*>(code);  // Base of data points at code.
-  base -= sizeof(void*);  // Move backward so that code_offset != 0.
-  const uint32_t code_offset = sizeof(void*);
-  OatFile::OatMethod oat_method(base, code_offset);
-  oat_method.LinkMethod(method);
-}
-
 void ClassLinker::SetEntryPointsToInterpreter(ArtMethod* method) const {
   if (!method->IsNative()) {
     method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
@@ -8839,16 +8854,6 @@
   find_array_class_cache_next_victim_ = 0;
 }
 
-void ClassLinker::ClearClassTableStrongRoots() const {
-  Thread* const self = Thread::Current();
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  for (const ClassLoaderData& data : class_loaders_) {
-    if (data.class_table != nullptr) {
-      data.class_table->ClearStrongRoots();
-    }
-  }
-}
-
 void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const {
   Thread* const self = Thread::Current();
   for (const ClassLoaderData& data : class_loaders_) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 324ed0c..66bcbe0 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -511,10 +511,6 @@
     return intern_table_;
   }
 
-  // Set the entrypoints up for method to the given code.
-  void SetEntryPointsToCompiledCode(ArtMethod* method, const void* method_code) const
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Set the entrypoints up for method to the enter the interpreter.
   void SetEntryPointsToInterpreter(ArtMethod* method) const
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -635,9 +631,9 @@
   // Create the IMT and conflict tables for a class.
   void FillIMTAndConflictTables(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Clear class table strong roots (other than classes themselves). This is done by dex2oat to
-  // allow pruning dex caches.
-  void ClearClassTableStrongRoots() const
+  // Visit all of the class tables. This is used by dex2oat to allow pruning dex caches.
+  template <class Visitor>
+  void VisitClassTables(const Visitor& visitor)
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -710,6 +706,12 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_);
 
+  virtual verifier::FailureKind PerformClassVerification(Thread* self,
+                                                         Handle<mirror::Class> klass,
+                                                         verifier::HardFailLogMode log_level,
+                                                         std::string* error_msg)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   class LinkInterfaceMethodsHelper;
 
diff --git a/runtime/class_loader_context.cc b/runtime/class_loader_context.cc
index 92d0f8d..e7051b3 100644
--- a/runtime/class_loader_context.cc
+++ b/runtime/class_loader_context.cc
@@ -68,6 +68,10 @@
   }
 }
 
+std::unique_ptr<ClassLoaderContext> ClassLoaderContext::Default() {
+  return Create("");
+}
+
 std::unique_ptr<ClassLoaderContext> ClassLoaderContext::Create(const std::string& spec) {
   std::unique_ptr<ClassLoaderContext> result(new ClassLoaderContext());
   if (result->Parse(spec)) {
@@ -263,7 +267,16 @@
   return removed_locations;
 }
 
+std::string ClassLoaderContext::EncodeContextForDex2oat(const std::string& base_dir) const {
+  return EncodeContext(base_dir, /*for_dex2oat*/ true);
+}
+
 std::string ClassLoaderContext::EncodeContextForOatFile(const std::string& base_dir) const {
+  return EncodeContext(base_dir, /*for_dex2oat*/ false);
+}
+
+std::string ClassLoaderContext::EncodeContext(const std::string& base_dir,
+                                              bool for_dex2oat) const {
   CheckDexFilesOpened("EncodeContextForOatFile");
   if (special_shared_library_) {
     return OatFile::kSpecialSharedLibrary;
@@ -286,8 +299,17 @@
     }
     out << GetClassLoaderTypeName(info.type);
     out << kClassLoaderOpeningMark;
+    std::set<std::string> seen_locations;
     for (size_t k = 0; k < info.opened_dex_files.size(); k++) {
       const std::unique_ptr<const DexFile>& dex_file = info.opened_dex_files[k];
+      if (for_dex2oat) {
+        // dex2oat only needs the base location. It cannot accept multidex locations.
+        // So ensure we only add each file once.
+        bool new_insert = seen_locations.insert(dex_file->GetBaseLocation()).second;
+        if (!new_insert) {
+          continue;
+        }
+      }
       const std::string& location = dex_file->GetLocation();
       if (k > 0) {
         out << kClasspathSeparator;
@@ -298,8 +320,11 @@
       } else {
         out << dex_file->GetLocation().c_str();
       }
-      out << kDexFileChecksumSeparator;
-      out << dex_file->GetLocationChecksum();
+      // dex2oat does not need the checksums.
+      if (!for_dex2oat) {
+        out << kDexFileChecksumSeparator;
+        out << dex_file->GetLocationChecksum();
+      }
     }
     out << kClassLoaderClosingMark;
   }
@@ -593,7 +618,7 @@
   }
 }
 
-bool ClassLoaderContext::VerifyClassLoaderContextMatch(const std::string& context_spec) {
+bool ClassLoaderContext::VerifyClassLoaderContextMatch(const std::string& context_spec) const {
   ClassLoaderContext expected_context;
   if (!expected_context.Parse(context_spec, /*parse_checksums*/ true)) {
     LOG(WARNING) << "Invalid class loader context: " << context_spec;
diff --git a/runtime/class_loader_context.h b/runtime/class_loader_context.h
index 37dd02b..9afa880 100644
--- a/runtime/class_loader_context.h
+++ b/runtime/class_loader_context.h
@@ -34,9 +34,6 @@
 // Utility class which holds the class loader context used during compilation/verification.
 class ClassLoaderContext {
  public:
-  // Creates an empty context (with no class loaders).
-  ClassLoaderContext();
-
   ~ClassLoaderContext();
 
   // Opens requested class path files and appends them to ClassLoaderInfo::opened_dex_files.
@@ -82,9 +79,16 @@
   // (so that it can be read and verified at runtime against the actual class
   // loader hierarchy).
   // Should only be called if OpenDexFiles() returned true.
-  // E.g. if the context is PCL[a.dex:b.dex] this will return "a.dex*a_checksum*b.dex*a_checksum".
+  // E.g. if the context is PCL[a.dex:b.dex] this will return
+  // "PCL[a.dex*a_checksum*b.dex*a_checksum]".
   std::string EncodeContextForOatFile(const std::string& base_dir) const;
 
+  // Encodes the context as a string suitable to be passed to dex2oat.
+  // This is the same as EncodeContextForOatFile but without adding the checksums
+  // and only adding each dex files once (no multidex).
+  // Should only be called if OpenDexFiles() returned true.
+  std::string EncodeContextForDex2oat(const std::string& base_dir) const;
+
   // Flattens the opened dex files into the given vector.
   // Should only be called if OpenDexFiles() returned true.
   std::vector<const DexFile*> FlattenOpenedDexFiles() const;
@@ -94,7 +98,7 @@
   //    - the number and type of the class loaders from the chain matches
   //    - the class loader from the same position have the same classpath
   //      (the order and checksum of the dex files matches)
-  bool VerifyClassLoaderContextMatch(const std::string& context_spec);
+  bool VerifyClassLoaderContextMatch(const std::string& context_spec) const;
 
   // Creates the class loader context from the given string.
   // The format: ClassLoaderType1[ClasspathElem1:ClasspathElem2...];ClassLoaderType2[...]...
@@ -119,6 +123,10 @@
   static std::unique_ptr<ClassLoaderContext> CreateContextForClassLoader(jobject class_loader,
                                                                          jobjectArray dex_elements);
 
+  // Returns the default class loader context to be used when none is specified.
+  // This will return a context with a single and empty PathClassLoader.
+  static std::unique_ptr<ClassLoaderContext> Default();
+
  private:
   enum ClassLoaderType {
     kInvalidClassLoader = 0,
@@ -144,6 +152,9 @@
     explicit ClassLoaderInfo(ClassLoaderType cl_type) : type(cl_type) {}
   };
 
+  // Creates an empty context (with no class loaders).
+  ClassLoaderContext();
+
   // Constructs an empty context.
   // `owns_the_dex_files` specifies whether or not the context will own the opened dex files
   // present in the class loader chain. If `owns_the_dex_files` is true then OpenDexFiles cannot
@@ -173,7 +184,15 @@
   bool AddInfoToContextFromClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
                                        Handle<mirror::ClassLoader> class_loader,
                                        Handle<mirror::ObjectArray<mirror::Object>> dex_elements)
-  REQUIRES_SHARED(Locks::mutator_lock_);
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Encodes the context as a string suitable to be passed to dex2oat or to be added to the
+  // oat file as the class path key.
+  // If for_dex2oat is true, the encoding adds each file once (i.e. it does not add multidex
+  // location). Otherwise, for oat files, the encoding adds all the dex files (including multidex)
+  // together with their checksums.
+  // Should only be called if OpenDexFiles() returned true.
+  std::string EncodeContext(const std::string& base_dir, bool for_dex2oat) const;
 
   // Extracts the class loader type from the given spec.
   // Return ClassLoaderContext::kInvalidClassLoader if the class loader type is not
diff --git a/runtime/class_loader_context_test.cc b/runtime/class_loader_context_test.cc
index 2b85188..d4688c1 100644
--- a/runtime/class_loader_context_test.cc
+++ b/runtime/class_loader_context_test.cc
@@ -455,6 +455,20 @@
   ASSERT_EQ(expected_encoding, context->EncodeContextForOatFile(""));
 }
 
+TEST_F(ClassLoaderContextTest, EncodeForDex2oat) {
+  std::string dex1_name = GetTestDexFileName("Main");
+  std::string dex2_name = GetTestDexFileName("MultiDex");
+  std::unique_ptr<ClassLoaderContext> context =
+      ClassLoaderContext::Create("PCL[" + dex1_name + ":" + dex2_name + "]");
+  ASSERT_TRUE(context->OpenDexFiles(InstructionSet::kArm, ""));
+
+  std::vector<std::unique_ptr<const DexFile>> dex1 = OpenTestDexFiles("Main");
+  std::vector<std::unique_ptr<const DexFile>> dex2 = OpenTestDexFiles("MultiDex");
+  std::string encoding = context->EncodeContextForDex2oat("");
+  std::string expected_encoding = "PCL[" + dex1_name + ":" + dex2_name + "]";
+  ASSERT_EQ(expected_encoding, context->EncodeContextForDex2oat(""));
+}
+
 // TODO(calin) add a test which creates the context for a class loader together with dex_elements.
 TEST_F(ClassLoaderContextTest, CreateContextForClassLoader) {
   // The chain is
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index b15d82f..1280466 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -132,6 +132,13 @@
   }
 }
 
+template <typename Filter>
+inline void ClassTable::RemoveStrongRoots(const Filter& filter) {
+  WriterMutexLock mu(Thread::Current(), lock_);
+  strong_roots_.erase(std::remove_if(strong_roots_.begin(), strong_roots_.end(), filter),
+                      strong_roots_.end());
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_TABLE_INL_H_
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 8616dfb..a259725 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -250,6 +250,12 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Filter strong roots (other than classes themselves).
+  template <typename Filter>
+  void RemoveStrongRoots(const Filter& filter)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   ReaderWriterMutex& GetLock() {
     return lock_;
   }
diff --git a/runtime/common_dex_operations.h b/runtime/common_dex_operations.h
index 528db96..fcc5393 100644
--- a/runtime/common_dex_operations.h
+++ b/runtime/common_dex_operations.h
@@ -200,6 +200,11 @@
       break;
     }
   }
+  if (transaction_active) {
+    if (UNLIKELY(self->IsExceptionPending())) {
+      return false;
+    }
+  }
   return true;
 }
 
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 74bc0b2..e2131f1 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -288,6 +288,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND() \
+  if (RUNNING_ON_MEMORY_TOOL > 0 && kMemoryToolIsValgrind) { \
+    printf("WARNING: TEST DISABLED FOR MEMORY TOOL VALGRIND\n"); \
+    return; \
+  }
+
 #define TEST_DISABLED_FOR_MEMORY_TOOL_ASAN() \
   if (RUNNING_ON_MEMORY_TOOL > 0 && !kMemoryToolIsValgrind) { \
     printf("WARNING: TEST DISABLED FOR MEMORY TOOL ASAN\n"); \
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 806653a..c51bb5e 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -22,6 +22,8 @@
 
 namespace art {
 
+class CompilerDriver;
+
 namespace verifier {
 
 class MethodVerifier;
@@ -49,6 +51,13 @@
   virtual verifier::VerifierDeps* GetVerifierDeps() const = 0;
   virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {}
 
+  virtual bool CanAssumeVerified(ClassReference ref ATTRIBUTE_UNUSED) {
+    return false;
+  }
+
+  virtual void SetDoesClassUnloading(bool does_class_unloading ATTRIBUTE_UNUSED,
+                                     CompilerDriver* compiler_driver ATTRIBUTE_UNUSED) {}
+
   bool IsBootImage() {
     return mode_ == CallbackMode::kCompileBootImage;
   }
diff --git a/runtime/dex_file_layout.cc b/runtime/dex_file_layout.cc
new file mode 100644
index 0000000..4375d7f
--- /dev/null
+++ b/runtime/dex_file_layout.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file_layout.h"
+
+#include <sys/mman.h>
+
+#include "dex_file.h"
+#include "utils.h"
+
+namespace art {
+
+void DexLayoutSection::Subsection::Madvise(const DexFile* dex_file, int advice) const {
+  DCHECK(dex_file != nullptr);
+  DCHECK_LE(size_, dex_file->Size());
+  DCHECK_LE(offset_ + size_, dex_file->Size());
+  MadviseLargestPageAlignedRegion(dex_file->Begin() + offset_,
+                                  dex_file->Begin() + offset_ + size_,
+                                  advice);
+}
+
+void DexLayoutSections::Madvise(const DexFile* dex_file, MadviseState state) const {
+  // The dex file is already defaulted to random access everywhere.
+  for (const DexLayoutSection& section : sections_) {
+    switch (state) {
+      case MadviseState::kMadviseStateAtLoad: {
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeStartupOnly)].Madvise(
+            dex_file,
+            MADV_WILLNEED);
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeHot)].Madvise(
+            dex_file,
+            MADV_WILLNEED);
+        break;
+      }
+      case MadviseState::kMadviseStateFinishedLaunch: {
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeStartupOnly)].Madvise(
+            dex_file,
+            MADV_DONTNEED);
+        break;
+      }
+      case MadviseState::kMadviseStateFinishedTrim: {
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeSometimesUsed)].Madvise(
+            dex_file,
+            MADV_DONTNEED);
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeUsedOnce)].Madvise(
+            dex_file,
+            MADV_DONTNEED);
+        break;
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const DexLayoutSection& section) {
+  for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
+    const DexLayoutSection::Subsection& part = section.parts_[i];
+    os << static_cast<LayoutType>(i) << "("
+       << part.offset_ << "-" << part.offset_ + part.size_ << ") ";
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const DexLayoutSections& sections) {
+  for (size_t i = 0; i < static_cast<size_t>(DexLayoutSections::SectionType::kSectionCount); ++i) {
+    os << static_cast<DexLayoutSections::SectionType>(i) << ":" << sections.sections_[i] << "\n";
+  }
+  return os;
+}
+
+}  // namespace art
diff --git a/runtime/dex_file_layout.h b/runtime/dex_file_layout.h
new file mode 100644
index 0000000..40cc912
--- /dev/null
+++ b/runtime/dex_file_layout.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_FILE_LAYOUT_H_
+#define ART_RUNTIME_DEX_FILE_LAYOUT_H_
+
+#include <cstdint>
+#include <iosfwd>
+
+namespace art {
+
+class DexFile;
+
+enum class LayoutType : uint8_t {
+  // Layout of things that are randomly used. These should be advised to random access.
+  // Without layout, this is the default mode when loading a dex file.
+  kLayoutTypeSometimesUsed,
+  // Layout of things that are only used during startup, these can be madvised after launch.
+  kLayoutTypeStartupOnly,
+  // Layout of things that are hot (commonly accessed), these should be pinned or madvised will
+  // need.
+  kLayoutTypeHot,
+  // Layout of things that are needed probably only once (class initializers). These can be
+  // madvised during trim events.
+  kLayoutTypeUsedOnce,
+  // Layout of things that are thought to be unused. These things should be advised to random
+  // access.
+  kLayoutTypeUnused,
+  // Unused value, just the number of elements in the enum.
+  kLayoutTypeCount,
+};
+std::ostream& operator<<(std::ostream& os, const LayoutType& collector_type);
+
+enum class MadviseState : uint8_t {
+  // Madvise based on a file that was just loaded.
+  kMadviseStateAtLoad,
+  // Madvise based after launch is finished.
+  kMadviseStateFinishedLaunch,
+  // Trim by madvising code that is unlikely to be too important in the future.
+  kMadviseStateFinishedTrim,
+};
+std::ostream& operator<<(std::ostream& os, const MadviseState& collector_type);
+
+// A dex layout section such as code items or strings. Each section is composed of subsections
+// that are layed out ajacently to each other such as (hot, unused, startup, etc...).
+class DexLayoutSection {
+ public:
+  // A subsection is a a continuous range of dex file that is all part of the same layout hint.
+  class Subsection {
+   public:
+    // Use uint32_t to handle 32/64 bit cross compilation.
+    uint32_t offset_ = 0u;
+    uint32_t size_ = 0u;
+
+    void Madvise(const DexFile* dex_file, int advice) const;
+  };
+
+  Subsection parts_[static_cast<size_t>(LayoutType::kLayoutTypeCount)];
+};
+
+// A set of dex layout sections, currently there is only one section for code and one for strings.
+class DexLayoutSections {
+ public:
+  enum class SectionType : uint8_t {
+    kSectionTypeCode,
+    kSectionTypeStrings,
+    kSectionCount,
+  };
+
+  // Advise access about the dex file based on layout. The caller is expected to have already
+  // madvised to MADV_RANDOM.
+  void Madvise(const DexFile* dex_file, MadviseState state) const;
+
+  DexLayoutSection sections_[static_cast<size_t>(SectionType::kSectionCount)];
+};
+
+std::ostream& operator<<(std::ostream& os, const DexLayoutSections::SectionType& collector_type);
+std::ostream& operator<<(std::ostream& os, const DexLayoutSection& section);
+std::ostream& operator<<(std::ostream& os, const DexLayoutSections& sections);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_FILE_LAYOUT_H_
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index c5c4eda..8fdd470 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -363,7 +363,7 @@
   // Check file size from the header.
   uint32_t expected_size = header_->file_size_;
   if (size_ != expected_size) {
-    ErrorStringPrintf("Bad file size (%zd, expected %ud)", size_, expected_size);
+    ErrorStringPrintf("Bad file size (%zd, expected %u)", size_, expected_size);
     return false;
   }
 
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index a2a6e08..5355267 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -65,8 +65,8 @@
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
-                                                                  CalleeSaveType::kSaveEverything);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(
+      self, CalleeSaveType::kSaveEverythingForClinit);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false);
@@ -80,8 +80,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
-                                                                  CalleeSaveType::kSaveEverything);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(
+      self, CalleeSaveType::kSaveEverythingForClinit);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false);
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 726bddd..b298db6 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -301,6 +301,7 @@
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj,
                                               uint32_t offset) {
+  // Used only in connection with non-volatile loads.
   DCHECK(kEmitCompilerReadBarrier);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
   mirror::HeapReference<mirror::Object>* ref_addr =
@@ -308,9 +309,10 @@
   constexpr ReadBarrierOption kReadBarrierOption =
       kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
   mirror::Object* result =
-      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption>(obj,
-                                                               MemberOffset(offset),
-                                                               ref_addr);
+      ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kReadBarrierOption>(
+        obj,
+        MemberOffset(offset),
+        ref_addr);
   return result;
 }
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 7e08b7a..b692618 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -90,7 +90,18 @@
                  GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveRefsOnly));        \
   CheckFrameSize(isa,                                                                \
                  CalleeSaveType::kSaveAllCalleeSaves,                                \
-                 GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveAllCalleeSaves))
+                 GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveAllCalleeSaves));  \
+  CheckFrameSize(isa,                                                                \
+                 CalleeSaveType::kSaveEverything,                                    \
+                 GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveEverything));      \
+  CheckFrameSize(isa,                                                                \
+                 CalleeSaveType::kSaveEverythingForClinit,                           \
+                 GetCalleeSaveFrameSize(isa,                                         \
+                                        CalleeSaveType::kSaveEverythingForClinit));  \
+  CheckFrameSize(isa,                                                                \
+                 CalleeSaveType::kSaveEverythingForSuspendCheck,                     \
+                 GetCalleeSaveFrameSize(                                             \
+                     isa, CalleeSaveType::kSaveEverythingForSuspendCheck))
 
   CHECK_FRAME_SIZE(kArm);
   CHECK_FRAME_SIZE(kArm64);
@@ -123,6 +134,13 @@
                 GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly));
   CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveAllCalleeSaves,
                 GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveAllCalleeSaves));
+  CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveEverything,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveEverything));
+  CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveEverythingForClinit,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveEverythingForClinit));
+  CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveEverythingForSuspendCheck,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA,
+                                            CalleeSaveType::kSaveEverythingForSuspendCheck));
 }
 
 }  // namespace art
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index fd0cd5f..7d01af0 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -79,8 +79,7 @@
 static mirror::Class* SafeGetClass(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
   char* obj_cls = reinterpret_cast<char*>(obj) + mirror::Object::ClassOffset().SizeValue();
 
-  mirror::HeapReference<mirror::Class> cls =
-      mirror::HeapReference<mirror::Class>::FromMirrorPtr(nullptr);
+  mirror::HeapReference<mirror::Class> cls;
   ssize_t rc = SafeCopy(&cls, obj_cls, sizeof(cls));
   CHECK_NE(-1, rc);
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 2901995..1b3d0da 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -115,8 +115,8 @@
   }
 
  private:
-  template<bool kPoisonReferences>
-  void MarkReference(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) const
+  template<typename CompressedReferenceType>
+  void MarkReference(CompressedReferenceType* obj_ptr) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
     mirror::Object* ref = obj_ptr->AsMirrorPtr();
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 92b4360..280c0b1 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -48,16 +48,21 @@
   CHECK(mem_map != nullptr);
   uintptr_t* bitmap_begin = reinterpret_cast<uintptr_t*>(mem_map->Begin());
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
-  return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin);
+  return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin, heap_capacity);
 }
 
 template<size_t kAlignment>
-SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin,
-                                     size_t bitmap_size, const void* heap_begin)
+SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name,
+                                     MemMap* mem_map,
+                                     uintptr_t* bitmap_begin,
+                                     size_t bitmap_size,
+                                     const void* heap_begin,
+                                     size_t heap_capacity)
     : mem_map_(mem_map),
       bitmap_begin_(reinterpret_cast<Atomic<uintptr_t>*>(bitmap_begin)),
       bitmap_size_(bitmap_size),
       heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
+      heap_limit_(reinterpret_cast<uintptr_t>(heap_begin) + heap_capacity),
       name_(name) {
   CHECK(bitmap_begin_ != nullptr);
   CHECK_NE(bitmap_size, 0U);
@@ -89,6 +94,7 @@
   if (new_size < bitmap_size_) {
     bitmap_size_ = new_size;
   }
+  heap_limit_ = new_end;
   // Not sure if doing this trim is necessary, since nothing past the end of the heap capacity
   // should be marked.
 }
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 2fe6394..b49e0b7 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -163,6 +163,7 @@
 
   void SetHeapSize(size_t bytes) {
     // TODO: Un-map the end of the mem map.
+    heap_limit_ = heap_begin_ + bytes;
     bitmap_size_ = OffsetToIndex(bytes) * sizeof(intptr_t);
     CHECK_EQ(HeapSize(), bytes);
   }
@@ -173,7 +174,7 @@
 
   // The maximum address which the bitmap can span. (HeapBegin() <= object < HeapLimit()).
   uint64_t HeapLimit() const {
-    return static_cast<uint64_t>(HeapBegin()) + HeapSize();
+    return heap_limit_;
   }
 
   // Set the max address which can covered by the bitmap.
@@ -196,8 +197,12 @@
  private:
   // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1,
   // however, we document that this is expected on heap_end_
-  SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size,
-              const void* heap_begin);
+  SpaceBitmap(const std::string& name,
+              MemMap* mem_map,
+              uintptr_t* bitmap_begin,
+              size_t bitmap_size,
+              const void* heap_begin,
+              size_t heap_capacity);
 
   template<bool kSetBit>
   bool Modify(const mirror::Object* obj);
@@ -211,10 +216,13 @@
   // Size of this bitmap.
   size_t bitmap_size_;
 
-  // The base address of the heap, which corresponds to the word containing the first bit in the
-  // bitmap.
+  // The start address of the memory covered by the bitmap, which corresponds to the word
+  // containing the first bit in the bitmap.
   const uintptr_t heap_begin_;
 
+  // The end address of the memory covered by the bitmap. This may not be on a word boundary.
+  uintptr_t heap_limit_;
+
   // Name of this bitmap.
   std::string name_;
 };
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 9d672b1..52b355d 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2431,24 +2431,31 @@
       // Non-immune non-moving space. Use the mark bitmap.
       accounting::ContinuousSpaceBitmap* mark_bitmap =
           heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      accounting::LargeObjectBitmap* los_bitmap =
-          heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
-      CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
       bool is_los = mark_bitmap == nullptr;
       if (!is_los && mark_bitmap->Test(from_ref)) {
         // Already marked.
         to_ref = from_ref;
-      } else if (is_los && los_bitmap->Test(from_ref)) {
-        // Already marked in LOS.
-        to_ref = from_ref;
       } else {
-        // Not marked.
-        if (IsOnAllocStack(from_ref)) {
-          // If on the allocation stack, it's considered marked.
+        accounting::LargeObjectBitmap* los_bitmap =
+            heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
+        // We may not have a large object space for dex2oat, don't assume it exists.
+        if (los_bitmap == nullptr) {
+          CHECK(heap_->GetLargeObjectsSpace() == nullptr)
+              << "LOS bitmap covers the entire address range " << from_ref
+              << " " << heap_->DumpSpaces();
+        }
+        if (los_bitmap != nullptr && is_los && los_bitmap->Test(from_ref)) {
+          // Already marked in LOS.
           to_ref = from_ref;
         } else {
           // Not marked.
-          to_ref = nullptr;
+          if (IsOnAllocStack(from_ref)) {
+            // If on the allocation stack, it's considered marked.
+            to_ref = from_ref;
+          } else {
+            // Not marked.
+            to_ref = nullptr;
+          }
         }
       }
     }
@@ -2457,6 +2464,7 @@
 }
 
 bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) {
+  // TODO: Explain why this is here. What release operation does it pair with?
   QuasiAtomic::ThreadFenceAcquire();
   accounting::ObjectStack* alloc_stack = GetAllocationStack();
   return alloc_stack->Contains(ref);
@@ -2617,9 +2625,8 @@
         }
       } while (!field->CasWeakRelaxed(from_ref, to_ref));
     } else {
-      QuasiAtomic::ThreadFenceRelease();
-      field->Assign(to_ref);
-      QuasiAtomic::ThreadFenceSequentiallyConsistent();
+      // TODO: Why is this seq_cst when the above is relaxed? Document memory ordering.
+      field->Assign</* kIsVolatile */ true>(to_ref);
     }
   }
   return true;
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index dec206b..f722e8d 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -113,7 +113,7 @@
   virtual mirror::Object* IsMarked(mirror::Object* obj)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   // Returns true if the given heap reference is null or is already marked. If it's already marked,
-  // update the reference (uses a CAS if do_atomic_update is true. Otherwise, returns false.
+  // update the reference (uses a CAS if do_atomic_update is true). Otherwise, returns false.
   virtual bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
                                            bool do_atomic_update)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 78fb2d2..7db5d2c 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -38,9 +38,8 @@
 // Used to mark and copy objects. Any newly-marked objects who are in the from space Get moved to
 // the to-space and have their forward address updated. Objects which have been newly marked are
 // pushed on the mark stack.
-template<bool kPoisonReferences>
-inline void SemiSpace::MarkObject(
-    mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) {
+template<typename CompressedReferenceType>
+inline void SemiSpace::MarkObject(CompressedReferenceType* obj_ptr) {
   mirror::Object* obj = obj_ptr->AsMirrorPtr();
   if (obj == nullptr) {
     return;
@@ -73,9 +72,8 @@
   }
 }
 
-template<bool kPoisonReferences>
-inline void SemiSpace::MarkObjectIfNotInToSpace(
-    mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) {
+template<typename CompressedReferenceType>
+inline void SemiSpace::MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr) {
   if (!to_space_->HasAddress(obj_ptr->AsMirrorPtr())) {
     MarkObject(obj_ptr);
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index fd52da3..6d4d789 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -97,13 +97,13 @@
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Updates obj_ptr if the object has moved.
-  template<bool kPoisonReferences>
-  void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
+  // Updates obj_ptr if the object has moved. Takes either an ObjectReference or a HeapReference.
+  template<typename CompressedReferenceType>
+  void MarkObject(CompressedReferenceType* obj_ptr)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template<bool kPoisonReferences>
-  void MarkObjectIfNotInToSpace(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
+  template<typename CompressedReferenceType>
+  void MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   virtual mirror::Object* MarkObject(mirror::Object* root) OVERRIDE
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1484382..14e017a 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -725,6 +725,10 @@
                image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod));
       CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything),
                image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForClinit),
+               image_header->GetImageMethod(ImageHeader::kSaveEverythingMethodForClinit));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForSuspendCheck),
+               image_header->GetImageMethod(ImageHeader::kSaveEverythingMethodForSuspendCheck));
     } else if (!runtime->HasResolutionMethod()) {
       runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
       runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
@@ -743,6 +747,12 @@
       runtime->SetCalleeSaveMethod(
           image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod),
           CalleeSaveType::kSaveEverything);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveEverythingMethodForClinit),
+          CalleeSaveType::kSaveEverythingForClinit);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveEverythingMethodForSuspendCheck),
+          CalleeSaveType::kSaveEverythingForSuspendCheck);
     }
 
     VLOG(image) << "ImageSpace::Init exiting " << *space.get();
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 79b775a..9baa016 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -38,12 +38,19 @@
   Thread* const self = Thread::Current();
   for (size_t i = 0; i < 2; ++i) {
     LargeObjectSpace* los = nullptr;
+    const size_t capacity = 128 * MB;
     if (i == 0) {
       los = space::LargeObjectMapSpace::Create("large object space");
     } else {
-      los = space::FreeListSpace::Create("large object space", nullptr, 128 * MB);
+      los = space::FreeListSpace::Create("large object space", nullptr, capacity);
     }
 
+    // Make sure the bitmap is not empty and actually covers at least how much we expect.
+    CHECK_LT(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin()),
+             static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit()));
+    CHECK_LE(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin() + capacity),
+             static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit()));
+
     static const size_t num_allocations = 64;
     static const size_t max_allocation_size = 0x100000;
     std::vector<std::pair<mirror::Object*, size_t>> requests;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index c2a8de3..c994127 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -191,14 +191,10 @@
   VLOG(heap) << "Size " << GetMemMap()->Size();
   VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
   VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail. Pass MAP_PRIVATE since we don't want to share the same ashmem as the zygote
-  // space.
+  // Remap the tail.
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(),
-                                                          alloc_space_name,
-                                                          PROT_READ | PROT_WRITE,
-                                                          MAP_PRIVATE,
-                                                          &error_msg));
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name,
+                                                          PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
   void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity,
                                     low_memory_mode);
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 11b3abb..314c45e 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -34,6 +34,10 @@
 DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveRefsAndArgs))))
 #define RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 0x18
 DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveEverything))))
+#define RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 0x20
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveEverythingForClinit))))
+#define RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET 0x28
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveEverythingForSuspendCheck))))
 #define THREAD_FLAGS_OFFSET 0
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value())))
 #define THREAD_ID_OFFSET 12
diff --git a/runtime/globals.h b/runtime/globals.h
index 6164225..256306d 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -51,11 +51,17 @@
 static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
 
+// Clion, clang analyzer, etc can falsely believe that "if (kIsDebugBuild)" always
+// returns the same value. By wrapping into a call to another constexpr function, we force it
+// to realize that is not actually always evaluating to the same value.
+static constexpr bool GlobalsReturnSelf(bool self) { return self; }
+
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
-#if defined(NDEBUG)
-static constexpr bool kIsDebugBuild = false;
+// TODO: Use only __clang_analyzer__ here. b/64455231
+#if defined(NDEBUG) && !defined(__CLION_IDE__)
+static constexpr bool kIsDebugBuild = GlobalsReturnSelf(false);
 #else
-static constexpr bool kIsDebugBuild = true;
+static constexpr bool kIsDebugBuild = GlobalsReturnSelf(true);
 #endif
 
 // ART_TARGET - Defined for target builds of ART.
diff --git a/runtime/image.h b/runtime/image.h
index 6c76f49..7bb796c 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -183,6 +183,8 @@
     kSaveRefsOnlyMethod,
     kSaveRefsAndArgsMethod,
     kSaveEverythingMethod,
+    kSaveEverythingMethodForClinit,
+    kSaveEverythingMethodForSuspendCheck,
     kImageMethodsCount,  // Number of elements in enum.
   };
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index b8ea597..9969489 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -461,8 +461,7 @@
   // This is used by the debugger to cause a deoptimization of the thread's stack after updating
   // local variable(s).
   void InstrumentThreadStack(Thread* thread)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   static size_t ComputeFrameId(Thread* self,
                                size_t frame_depth,
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 136d0c6..f8cb243 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -33,6 +33,7 @@
 #include "reflection.h"
 #include "stack.h"
 #include "thread-inl.h"
+#include "transaction.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -42,7 +43,8 @@
   ThrowNullPointerExceptionFromDexPC();
 }
 
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active>
 bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
                 uint16_t inst_data) {
   const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
@@ -57,6 +59,13 @@
   ObjPtr<mirror::Object> obj;
   if (is_static) {
     obj = f->GetDeclaringClass();
+    if (transaction_active) {
+      if (Runtime::Current()->GetTransaction()->ReadConstraint(obj.Ptr(), f)) {
+        Runtime::Current()->AbortTransactionAndThrowAbortError(self, "Can't read static fields of "
+            + obj->PrettyTypeOf() + " since it does not belong to clinit's class.");
+        return false;
+      }
+    }
   } else {
     obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
     if (UNLIKELY(obj == nullptr)) {
@@ -102,15 +111,17 @@
 }
 
 // Explicitly instantiate all DoFieldGet functions.
-#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check) \
-  template bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, \
+#define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active) \
+  template bool DoFieldGet<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, \
                                                                ShadowFrame& shadow_frame, \
                                                                const Instruction* inst, \
                                                                uint16_t inst_data)
 
 #define EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(_find_type, _field_type)  \
-    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false);  \
-    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true);
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false, true);  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, false, false);  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true, true);  \
+    EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, true, false);
 
 // iget-XXX
 EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL(InstancePrimitiveRead, Primitive::kPrimBoolean)
@@ -261,6 +272,14 @@
   ObjPtr<mirror::Object> obj;
   if (is_static) {
     obj = f->GetDeclaringClass();
+    if (transaction_active) {
+      if (Runtime::Current()->GetTransaction()->WriteConstraint(obj.Ptr(), f)) {
+        Runtime::Current()->AbortTransactionAndThrowAbortError(
+            self, "Can't set fields of " + obj->PrettyTypeOf());
+        return false;
+      }
+    }
+
   } else {
     obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
     if (UNLIKELY(obj == nullptr)) {
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index d293aeb..5b942f2 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -141,11 +141,8 @@
     return false;
   } else {
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr) {
-      if (type == kVirtual) {
-        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
-      }
-      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    if (jit != nullptr && type == kVirtual) {
+      jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
     }
     if (called_method->IsIntrinsic()) {
       if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
@@ -182,11 +179,8 @@
     return false;
   } else {
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr) {
-      if (type == kVirtual || type == kInterface) {
-        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
-      }
-      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    if (jit != nullptr && (type == kVirtual || type == kInterface)) {
+      jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
     }
     // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (type == kVirtual || type == kInterface) {
@@ -270,7 +264,8 @@
 
 // Handles iget-XXX and sget-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active = false>
 bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
                 uint16_t inst_data) REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bdb8332..0c5a45f 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -1313,50 +1313,50 @@
       }
       case Instruction::SGET_BOOLEAN: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimBoolean, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimBoolean, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
       case Instruction::SGET_BYTE: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimByte, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimByte, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
       case Instruction::SGET_CHAR: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimChar, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimChar, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
       case Instruction::SGET_SHORT: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimShort, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimShort, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
       case Instruction::SGET: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimInt, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimInt, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
       case Instruction::SGET_WIDE: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimLong, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticPrimitiveRead, Primitive::kPrimLong, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
       case Instruction::SGET_OBJECT: {
         PREAMBLE();
-        bool success = DoFieldGet<StaticObjectRead, Primitive::kPrimNot, do_access_check>(
-            self, shadow_frame, inst, inst_data);
+        bool success = DoFieldGet<StaticObjectRead, Primitive::kPrimNot, do_access_check,
+            transaction_active>(self, shadow_frame, inst, inst_data);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 5955b90..88254a8 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -280,7 +280,6 @@
         if (jit != nullptr) {
           jit->InvokeVirtualOrInterface(
               receiver, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), called_method);
-          jit->AddSamples(self, shadow_frame->GetMethod(), 1, /*with_backedges*/false);
         }
         return !self->IsExceptionPending();
       }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 2c72821..ce06a03 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1438,7 +1438,11 @@
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
+    ReadBarrier::Barrier<
+        mirror::Object,
+        /* kIsVolatile */ false,
+        kWithReadBarrier,
+        /* kAlwaysUpdateField */ true>(
         obj,
         MemberOffset(offset),
         field_addr);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index a030a51..47ace7f 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -47,13 +47,9 @@
 static constexpr int kProtAll = PROT_READ | PROT_WRITE | PROT_EXEC;
 static constexpr int kProtData = PROT_READ | PROT_WRITE;
 static constexpr int kProtCode = PROT_READ | PROT_EXEC;
-static constexpr int kProtReadOnly = PROT_READ;
-static constexpr int kProtNone = PROT_NONE;
 
 static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
 static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
-static constexpr size_t kMinMapSpacingPages = 1;
-static constexpr size_t kMaxMapSpacingPages = 128;
 
 #define CHECKED_MPROTECT(memory, size, prot)                \
   do {                                                      \
@@ -64,39 +60,12 @@
     }                                                       \
   } while (false)                                           \
 
-static MemMap* SplitMemMap(MemMap* existing_map,
-                           const char* name,
-                           size_t split_offset,
-                           int split_prot,
-                           std::string* error_msg,
-                           bool use_ashmem,
-                           unique_fd* shmem_fd = nullptr) {
-  std::string error_str;
-  uint8_t* divider = existing_map->Begin() + split_offset;
-  MemMap* new_map = existing_map->RemapAtEnd(divider,
-                                             name,
-                                             split_prot,
-                                             MAP_SHARED,
-                                             &error_str,
-                                             use_ashmem,
-                                             shmem_fd);
-  if (new_map == nullptr) {
-    std::ostringstream oss;
-    oss << "Failed to create spacing for " << name << ": "
-        << error_str << " offset=" << split_offset;
-    *error_msg = oss.str();
-    return nullptr;
-  }
-  return new_map;
-}
-
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
                                    bool generate_debug_info,
                                    std::string* error_msg) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  CHECK_GT(max_capacity, initial_capacity);
-  CHECK_GE(max_capacity - kMaxMapSpacingPages * kPageSize, initial_capacity);
+  CHECK_GE(max_capacity, initial_capacity);
 
   // Generating debug information is for using the Linux perf tool on
   // host which does not work with ashmem.
@@ -106,10 +75,6 @@
   // With 'perf', we want a 1-1 mapping between an address and a method.
   bool garbage_collect_code = !generate_debug_info;
 
-  // We only use two mappings (separating rw from rx) if we are able to use ashmem.
-  // See the above comment for debug information and not using ashmem.
-  bool use_two_mappings = use_ashmem;
-
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -121,10 +86,6 @@
     return nullptr;
   }
 
-  // Align both capacities to page size, as that's the unit mspaces use.
-  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
-  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
-
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   // Map in low 4gb to simplify accessing root tables for x86_64.
@@ -146,138 +107,35 @@
     return nullptr;
   }
 
-  // Create a region for JIT data and executable code. This will be
-  // laid out as:
-  //
-  //          +----------------+ --------------------
-  //          | code_sync_map_ | ^ code_sync_size   ^
-  //          |                | v                  |
-  //          +----------------+ --                 |
-  //          :                : ^                  |
-  //          :  post_code_map : | post_code_size   |
-  //          :   [padding]    : v                  |
-  //          +----------------+ -                  |
-  //          |                | ^                  |
-  //          |   code_map     | | code_size        | total_mapping_size
-  //          |   [JIT Code]   | v                  |
-  //          +----------------+ -                  |
-  //          :                : ^                  |
-  //          :  pre_code_map  : | pre_code_size    |
-  //          :   [padding]    : v                  |
-  //          +----------------+ -                  |
-  //          |                | ^                  |
-  //          |    data_map    | | data_size        |
-  //          |   [Jit Data]   | v                  v
-  //          +----------------+ --------------------
-  //
-  // The code_sync_map_ contains a page that we use flush CPU instruction
-  // pipelines (see FlushInstructionPipelines()).
-  //
-  // The padding regions - pre_code_map and post_code_map - exist to
-  // put some random distance between the writable JIT code mapping
-  // and the executable mapping. The padding is discarded at the end
-  // of this function.
-  //
-  size_t data_size = (max_capacity - kMaxMapSpacingPages * kPageSize) / 2;
-  size_t pre_code_size =
-      GetRandomNumber(kMinMapSpacingPages, kMaxMapSpacingPages - 1) * kPageSize;
-  size_t code_size = max_capacity - data_size - kMaxMapSpacingPages * kPageSize;
-  size_t code_sync_size = kPageSize;
-  size_t post_code_size = kMaxMapSpacingPages * kPageSize - pre_code_size - code_sync_size;
-  DCHECK_EQ(data_size, code_size);
-  DCHECK_EQ(pre_code_size + post_code_size + code_sync_size, kMaxMapSpacingPages * kPageSize);
-  DCHECK_EQ(data_size + pre_code_size + code_size + post_code_size + code_sync_size, max_capacity);
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
 
-  // Create pre-code padding region after data region, discarded after
-  // code and data regions are set-up.
-  std::unique_ptr<MemMap> pre_code_map(SplitMemMap(data_map.get(),
-                                                   "jit-code-cache-padding",
-                                                   data_size,
-                                                   kProtNone,
-                                                   error_msg,
-                                                   use_ashmem));
-  if (pre_code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(data_map->Size(), data_size);
-  DCHECK_EQ(pre_code_map->Size(), pre_code_size + code_size + post_code_size + code_sync_size);
+  // Data cache is 1 / 2 of the map.
+  // TODO: Make this variable?
+  size_t data_size = max_capacity / 2;
+  size_t code_size = max_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, max_capacity);
+  uint8_t* divider = data_map->Begin() + data_size;
 
-  // Create code region.
-  unique_fd writable_code_fd;
-  std::unique_ptr<MemMap> code_map(SplitMemMap(pre_code_map.get(),
-                                               "jit-code-cache",
-                                               pre_code_size,
-                                               use_two_mappings ? kProtCode : kProtAll,
-                                               error_msg,
-                                               use_ashmem,
-                                               &writable_code_fd));
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
   if (code_map == nullptr) {
+    std::ostringstream oss;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
+    *error_msg = oss.str();
     return nullptr;
   }
-  DCHECK_EQ(pre_code_map->Size(), pre_code_size);
-  DCHECK_EQ(code_map->Size(), code_size + post_code_size + code_sync_size);
-
-  // Padding after code region, discarded after code and data regions
-  // are set-up.
-  std::unique_ptr<MemMap> post_code_map(SplitMemMap(code_map.get(),
-                                                    "jit-code-cache-padding",
-                                                    code_size,
-                                                    kProtNone,
-                                                    error_msg,
-                                                    use_ashmem));
-  if (post_code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(code_map->Size(), code_size);
-  DCHECK_EQ(post_code_map->Size(), post_code_size + code_sync_size);
-
-  std::unique_ptr<MemMap> code_sync_map(SplitMemMap(post_code_map.get(),
-                                                    "jit-code-sync",
-                                                    post_code_size,
-                                                    kProtCode,
-                                                    error_msg,
-                                                    use_ashmem));
-  if (code_sync_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(post_code_map->Size(), post_code_size);
-  DCHECK_EQ(code_sync_map->Size(), code_sync_size);
-
-  std::unique_ptr<MemMap> writable_code_map;
-  if (use_two_mappings) {
-    // Allocate the R/W view.
-    writable_code_map.reset(MemMap::MapFile(code_size,
-                                            kProtData,
-                                            MAP_SHARED,
-                                            writable_code_fd.get(),
-                                            /* start */ 0,
-                                            /* low_4gb */ true,
-                                            "jit-writable-code",
-                                            &error_str));
-    if (writable_code_map == nullptr) {
-      std::ostringstream oss;
-      oss << "Failed to create writable code cache: " << error_str << " size=" << code_size;
-      *error_msg = oss.str();
-      return nullptr;
-    }
-  }
+  DCHECK_EQ(code_map->Begin(), divider);
   data_size = initial_capacity / 2;
   code_size = initial_capacity - data_size;
   DCHECK_EQ(code_size + data_size, initial_capacity);
-  return new JitCodeCache(writable_code_map.release(),
-                          code_map.release(),
-                          data_map.release(),
-                          code_sync_map.release(),
-                          code_size,
-                          data_size,
-                          max_capacity,
-                          garbage_collect_code);
+  return new JitCodeCache(
+      code_map, data_map.release(), code_size, data_size, max_capacity, garbage_collect_code);
 }
 
-JitCodeCache::JitCodeCache(MemMap* writable_code_map,
-                           MemMap* executable_code_map,
+JitCodeCache::JitCodeCache(MemMap* code_map,
                            MemMap* data_map,
-                           MemMap* code_sync_map,
                            size_t initial_code_capacity,
                            size_t initial_data_capacity,
                            size_t max_capacity,
@@ -285,10 +143,8 @@
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache condition variable", lock_),
       collection_in_progress_(false),
+      code_map_(code_map),
       data_map_(data_map),
-      executable_code_map_(executable_code_map),
-      writable_code_map_(writable_code_map),
-      code_sync_map_(code_sync_map),
       max_capacity_(max_capacity),
       current_capacity_(initial_code_capacity + initial_data_capacity),
       code_end_(initial_code_capacity),
@@ -308,8 +164,7 @@
       inline_cache_cond_("Jit inline cache condition variable", lock_) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
-  MemMap* writable_map = GetWritableMemMap();
-  code_mspace_ = create_mspace_with_base(writable_map->Begin(), code_end_, false /*locked*/);
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
   if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
@@ -318,10 +173,7 @@
 
   SetFootprintLimit(current_capacity_);
 
-  if (writable_code_map_ != nullptr) {
-    CHECKED_MPROTECT(writable_code_map_->Begin(), writable_code_map_->Size(), kProtReadOnly);
-  }
-  CHECKED_MPROTECT(executable_code_map_->Begin(), executable_code_map_->Size(), kProtCode);
+  CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
 
   VLOG(jit) << "Created jit code cache: initial data size="
@@ -331,7 +183,7 @@
 }
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
-  return executable_code_map_->Begin() <= ptr && ptr < executable_code_map_->End();
+  return code_map_->Begin() <= ptr && ptr < code_map_->End();
 }
 
 bool JitCodeCache::ContainsMethod(ArtMethod* method) {
@@ -344,96 +196,27 @@
   return false;
 }
 
-/* This method is only for CHECK/DCHECK that pointers are within to a region. */
-static bool IsAddressInMap(const void* addr,
-                           const MemMap* mem_map,
-                           const char* check_name) {
-  if (addr == nullptr || mem_map->HasAddress(addr)) {
-    return true;
-  }
-  LOG(ERROR) << "Is" << check_name << "Address " << addr
-             << " not in [" << reinterpret_cast<void*>(mem_map->Begin())
-             << ", " << reinterpret_cast<void*>(mem_map->Begin() + mem_map->Size()) << ")";
-  return false;
-}
-
-bool JitCodeCache::IsDataAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, data_map_.get(), "Data");
-}
-
-bool JitCodeCache::IsExecutableAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, executable_code_map_.get(), "Executable");
-}
-
-bool JitCodeCache::IsWritableAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, GetWritableMemMap(), "Writable");
-}
-
-// Convert one address within the source map to the same offset within the destination map.
-static void* ConvertAddress(const void* source_address,
-                            const MemMap* source_map,
-                            const MemMap* destination_map) {
-  DCHECK(source_map->HasAddress(source_address)) << source_address;
-  ptrdiff_t offset = reinterpret_cast<const uint8_t*>(source_address) - source_map->Begin();
-  uintptr_t address = reinterpret_cast<uintptr_t>(destination_map->Begin()) + offset;
-  return reinterpret_cast<void*>(address);
-}
-
-template <typename T>
-T* JitCodeCache::ToExecutableAddress(T* writable_address) const {
-  CHECK(IsWritableAddress(writable_address));
-  if (writable_address == nullptr) {
-    return nullptr;
-  }
-  void* executable_address = ConvertAddress(writable_address,
-                                            GetWritableMemMap(),
-                                            executable_code_map_.get());
-  CHECK(IsExecutableAddress(executable_address));
-  return reinterpret_cast<T*>(executable_address);
-}
-
-void* JitCodeCache::ToWritableAddress(const void* executable_address) const {
-  CHECK(IsExecutableAddress(executable_address));
-  if (executable_address == nullptr) {
-    return nullptr;
-  }
-  void* writable_address = ConvertAddress(executable_address,
-                                          executable_code_map_.get(),
-                                          GetWritableMemMap());
-  CHECK(IsWritableAddress(writable_address));
-  return writable_address;
-}
-
 class ScopedCodeCacheWrite : ScopedTrace {
  public:
-  explicit ScopedCodeCacheWrite(JitCodeCache* code_cache)
-      : ScopedTrace("ScopedCodeCacheWrite") {
+  explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false)
+      : ScopedTrace("ScopedCodeCacheWrite"),
+        code_map_(code_map),
+        only_for_tlb_shootdown_(only_for_tlb_shootdown) {
     ScopedTrace trace("mprotect all");
-    int prot_to_start_writing = kProtAll;
-    if (code_cache->writable_code_map_ == nullptr) {
-      // If there is only one mapping, use the executable mapping and toggle between rwx and rx.
-      prot_to_start_writing = kProtAll;
-      prot_to_stop_writing_ = kProtCode;
-    } else {
-      // If there are two mappings, use the writable mapping and toggle between rw and r.
-      prot_to_start_writing = kProtData;
-      prot_to_stop_writing_ = kProtReadOnly;
-    }
-    writable_map_ = code_cache->GetWritableMemMap();
-    // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
-    // one page.
-    size_ = writable_map_->Size();
-    CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_start_writing);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtAll);
   }
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_stop_writing_);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtCode);
   }
-
  private:
-  int prot_to_stop_writing_;
-  MemMap* writable_map_;
-  size_t size_;
+  MemMap* const code_map_;
+
+  // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
+  // one page.
+  const bool only_for_tlb_shootdown_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedCodeCacheWrite);
 };
@@ -448,6 +231,7 @@
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
                                   size_t code_size,
+                                  size_t data_size,
                                   bool osr,
                                   Handle<mirror::ObjectArray<mirror::Object>> roots,
                                   bool has_should_deoptimize_flag,
@@ -462,6 +246,7 @@
                                        fp_spill_mask,
                                        code,
                                        code_size,
+                                       data_size,
                                        osr,
                                        roots,
                                        has_should_deoptimize_flag,
@@ -479,6 +264,7 @@
                                 fp_spill_mask,
                                 code,
                                 code_size,
+                                data_size,
                                 osr,
                                 roots,
                                 has_should_deoptimize_flag,
@@ -540,10 +326,8 @@
   }
 }
 
-uint8_t* JitCodeCache::GetRootTable(const void* code_ptr, uint32_t* number_of_roots) {
-  CHECK(IsExecutableAddress(code_ptr));
+static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) {
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  // GetOptimizedCodeInfoPtr uses offsets relative to the EXECUTABLE address.
   uint8_t* data = method_header->GetOptimizedCodeInfoPtr();
   uint32_t roots = GetNumberOfRoots(data);
   if (number_of_roots != nullptr) {
@@ -588,8 +372,6 @@
 void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
   MutexLock mu(Thread::Current(), lock_);
   for (const auto& entry : method_code_map_) {
-    // GetRootTable takes an EXECUTABLE address.
-    CHECK(IsExecutableAddress(entry.first));
     uint32_t number_of_roots = 0;
     uint8_t* roots_data = GetRootTable(entry.first, &number_of_roots);
     GcRoot<mirror::Object>* roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
@@ -627,19 +409,17 @@
   }
 }
 
-void JitCodeCache::FreeCodeAndData(const void* code_ptr) {
-  CHECK(IsExecutableAddress(code_ptr));
+void JitCodeCache::FreeCode(const void* code_ptr) {
+  uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  // GetRootTable takes an EXECUTABLE address.
   FreeData(GetRootTable(code_ptr));
-  FreeRawCode(reinterpret_cast<uint8_t*>(FromCodeToAllocation(code_ptr)));
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
 void JitCodeCache::FreeAllMethodHeaders(
     const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
-  // method_headers are expected to be in the executable region.
   {
     MutexLock mu(Thread::Current(), *Locks::cha_lock_);
     Runtime::Current()->GetClassLinker()->GetClassHierarchyAnalysis()
@@ -651,9 +431,9 @@
   // so it's possible for the same method_header to start representing
   // different compile code.
   MutexLock mu(Thread::Current(), lock_);
-  ScopedCodeCacheWrite scc(this);
+  ScopedCodeCacheWrite scc(code_map_.get());
   for (const OatQuickMethodHeader* method_header : method_headers) {
-    FreeCodeAndData(method_header->GetCode());
+    FreeCode(method_header->GetCode());
   }
 }
 
@@ -670,10 +450,9 @@
     // with the classlinker_classes_lock_ held, and suspending ourselves could
     // lead to a deadlock.
     {
-      ScopedCodeCacheWrite scc(this);
+      ScopedCodeCacheWrite scc(code_map_.get());
       for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
         if (alloc.ContainsUnsafe(it->second)) {
-          CHECK(IsExecutableAddress(OatQuickMethodHeader::FromCodePointer(it->first)));
           method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
           it = method_code_map_.erase(it);
         } else {
@@ -765,129 +544,6 @@
   method->SetCounter(std::min(jit_warmup_threshold - 1, 1));
 }
 
-static void FlushInstructionPiplines(uint8_t* sync_page) {
-  // After updating the JIT code cache we need to force all CPUs to
-  // flush their instruction pipelines. In the absence of system call
-  // to do this explicitly, we can achieve this indirectly by toggling
-  // permissions on an executable page. This should send an IPI to
-  // each core to update the TLB entry with the interrupt raised on
-  // each core causing the instruction pipeline to be flushed.
-  CHECKED_MPROTECT(sync_page, kPageSize, kProtAll);
-  // Ensure the sync_page is present otherwise a TLB update may not be
-  // necessary.
-  sync_page[0] = 0;
-  CHECKED_MPROTECT(sync_page, kPageSize, kProtCode);
-}
-
-#ifdef __aarch64__
-
-static void FlushJitCodeCacheRange(uint8_t* code_ptr,
-                                   uint8_t* writable_ptr,
-                                   size_t code_size) {
-  // Cache maintenance instructions can cause permission faults when a
-  // page is not present (e.g. swapped out or not backed). These
-  // faults should be handled by the kernel, but a bug in some Linux
-  // kernels may surface these permission faults to user-land which
-  // does not currently deal with them (b/63885946). To work around
-  // this, we read a value from each page to fault it in before
-  // attempting to perform cache maintenance operations.
-  //
-  // For reference, this behavior is caused by this commit:
-  // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
-
-  // The cache-line size could be probed for from the CPU, but
-  // assuming a safe lower bound is safe for CPUs that have different
-  // cache-line sizes for big and little cores.
-  static const uintptr_t kSafeCacheLineSize = 32;
-
-  // Ensure stores are present in L1 data cache.
-  __asm __volatile("dsb ish" ::: "memory");
-
-  volatile uint8_t mutant;
-
-  // Push dirty cache-lines out to the point of unification (PoU). The
-  // point of unification is the first point in the cache/memory
-  // hierarchy where the instruction cache and data cache have the
-  // same view of memory. The PoU is where an instruction fetch will
-  // fetch the new code generated by the JIT.
-  //
-  // See: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch11s04.html
-  uintptr_t writable_addr = RoundDown(reinterpret_cast<uintptr_t>(writable_ptr),
-                                      kSafeCacheLineSize);
-  uintptr_t writable_end  = RoundUp(reinterpret_cast<uintptr_t>(writable_ptr) + code_size,
-                                    kSafeCacheLineSize);
-  while (writable_addr < writable_end) {
-    // Read from the cache-line to minimize the chance that a cache
-    // maintenance instruction causes a fault (see kernel bug comment
-    // above).
-    mutant = *reinterpret_cast<const uint8_t*>(writable_addr);
-
-    // Flush cache-line
-    __asm volatile("dc cvau, %0" :: "r"(writable_addr) : "memory");
-    writable_addr += kSafeCacheLineSize;
-  }
-
-  __asm __volatile("dsb ish" ::: "memory");
-
-  uintptr_t code_addr = RoundDown(reinterpret_cast<uintptr_t>(code_ptr), kSafeCacheLineSize);
-  const uintptr_t code_end = RoundUp(reinterpret_cast<uintptr_t>(code_ptr) + code_size,
-                                     kSafeCacheLineSize);
-  while (code_addr < code_end) {
-    // Read from the cache-line to minimize the chance that a cache
-    // maintenance instruction causes a fault (see kernel bug comment
-    // above).
-    mutant = *reinterpret_cast<const uint8_t*>(code_addr);
-
-    // Invalidating the data cache line is only strictly necessary
-    // when the JIT code cache has two mappings (the default). We know
-    // this cache line is clean so this is just invalidating it (using
-    // "dc ivac" would be preferable, but counts as a write and this
-    // memory may not be mapped write permission).
-    __asm volatile("dc cvau, %0" :: "r"(code_addr) : "memory");
-
-    // Invalidate the instruction cache line to force instructions in
-    // range to be re-fetched following update.
-    __asm volatile("ic ivau, %0" :: "r"(code_addr) : "memory");
-
-    code_addr += kSafeCacheLineSize;
-  }
-
-  // Wait for code cache invalidations to complete.
-  __asm __volatile("dsb ish" ::: "memory");
-
-  // Reset fetched instruction stream.
-  __asm __volatile("isb");
-}
-
-#else  // __aarch64
-
-static void FlushJitCodeCacheRange(uint8_t* code_ptr,
-                                   uint8_t* writable_ptr,
-                                   size_t code_size) {
-  if (writable_ptr != code_ptr) {
-    // When there are two mappings of the JIT code cache, RX and
-    // RW, flush the RW version first as we've just dirtied the
-    // cache lines with new code. Flushing the RX version first
-    // can cause a permission fault as the those addresses are not
-    // writable, but can appear dirty in the cache. There is a lot
-    // of potential subtlety here depending on how the cache is
-    // indexed and tagged.
-    //
-    // Flushing the RX version after the RW version is just
-    // invalidating cachelines in the instruction cache. This is
-    // necessary as the instruction cache will often have a
-    // different set of cache lines present and because the JIT
-    // code cache can start a new function at any boundary within
-    // a cache-line.
-    FlushDataCache(reinterpret_cast<char*>(writable_ptr),
-                   reinterpret_cast<char*>(writable_ptr + code_size));
-  }
-  FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
-                        reinterpret_cast<char*>(code_ptr + code_size));
-}
-
-#endif  // __aarch64
-
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -898,6 +554,7 @@
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
                                           size_t code_size,
+                                          size_t data_size,
                                           bool osr,
                                           Handle<mirror::ObjectArray<mirror::Object>> roots,
                                           bool has_should_deoptimize_flag,
@@ -917,37 +574,35 @@
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
     {
-      ScopedCodeCacheWrite scc(this);
+      ScopedCodeCacheWrite scc(code_map_.get());
       memory = AllocateCode(total_size);
       if (memory == nullptr) {
         return nullptr;
       }
-      uint8_t* writable_ptr = memory + header_size;
-      code_ptr = ToExecutableAddress(writable_ptr);
+      code_ptr = memory + header_size;
 
-      std::copy(code, code + code_size, writable_ptr);
-      OatQuickMethodHeader* writable_method_header =
-          OatQuickMethodHeader::FromCodePointer(writable_ptr);
-      // We need to be able to write the OatQuickMethodHeader, so we use writable_method_header.
-      // Otherwise, the offsets encoded in OatQuickMethodHeader are used relative to an executable
-      // address, so we use code_ptr.
-      new (writable_method_header) OatQuickMethodHeader(
+      std::copy(code, code + code_size, code_ptr);
+      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      new (method_header) OatQuickMethodHeader(
           code_ptr - stack_map,
           code_ptr - method_info,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
           code_size);
-
-      FlushJitCodeCacheRange(code_ptr, writable_ptr, code_size);
-      FlushInstructionPiplines(code_sync_map_->Begin());
-
+      // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
+      // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
+      // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
+      // 6P) stop being supported or their kernels are fixed.
+      //
+      // For reference, this behavior is caused by this commit:
+      // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+      FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
+                            reinterpret_cast<char*>(code_ptr + code_size));
       DCHECK(!Runtime::Current()->IsAotCompiler());
       if (has_should_deoptimize_flag) {
-        writable_method_header->SetHasShouldDeoptimizeFlag();
+        method_header->SetHasShouldDeoptimizeFlag();
       }
-      // All the pointers exported from the cache are executable addresses.
-      method_header = ToExecutableAddress(writable_method_header);
     }
 
     number_of_compilations_++;
@@ -986,14 +641,16 @@
     // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     // Fill the root table before updating the entry point.
-    CHECK(IsDataAddress(roots_data));
     DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
     DCHECK_LE(roots_data, stack_map);
     FillRootTable(roots_data, roots);
-
-    // Ensure the updates to the root table are visible with a store fence.
-    QuasiAtomic::ThreadFenceSequentiallyConsistent();
-
+    {
+      // Flush data cache, as compiled code references literals in it.
+      // We also need a TLB shootdown to act as memory barrier across cores.
+      ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
+      FlushDataCache(reinterpret_cast<char*>(roots_data),
+                     reinterpret_cast<char*>(roots_data + data_size));
+    }
     method_code_map_.Put(code_ptr, method);
     if (osr) {
       number_of_osr_compilations_++;
@@ -1041,11 +698,11 @@
 
   bool in_cache = false;
   {
-    ScopedCodeCacheWrite ccw(this);
+    ScopedCodeCacheWrite ccw(code_map_.get());
     for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
       if (code_iter->second == method) {
         if (release_memory) {
-          FreeCodeAndData(code_iter->first);
+          FreeCode(code_iter->first);
         }
         code_iter = method_code_map_.erase(code_iter);
         in_cache = true;
@@ -1099,10 +756,10 @@
     profiling_infos_.erase(profile);
   }
   method->SetProfilingInfo(nullptr);
-  ScopedCodeCacheWrite ccw(this);
+  ScopedCodeCacheWrite ccw(code_map_.get());
   for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
     if (code_iter->second == method) {
-      FreeCodeAndData(code_iter->first);
+      FreeCode(code_iter->first);
       code_iter = method_code_map_.erase(code_iter);
       continue;
     }
@@ -1168,7 +825,6 @@
                              uint8_t* stack_map_data,
                              uint8_t* roots_data) {
   DCHECK_EQ(FromStackMapToRoots(stack_map_data), roots_data);
-  CHECK(IsDataAddress(roots_data));
   MutexLock mu(self, lock_);
   FreeData(reinterpret_cast<uint8_t*>(roots_data));
 }
@@ -1290,11 +946,11 @@
 
 void JitCodeCache::SetFootprintLimit(size_t new_footprint) {
   size_t per_space_footprint = new_footprint / 2;
-  CHECK(IsAlignedParam(per_space_footprint, kPageSize));
+  DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
   DCHECK_EQ(per_space_footprint * 2, new_footprint);
   mspace_set_footprint_limit(data_mspace_, per_space_footprint);
   {
-    ScopedCodeCacheWrite scc(this);
+    ScopedCodeCacheWrite scc(code_map_.get());
     mspace_set_footprint_limit(code_mspace_, per_space_footprint);
   }
 }
@@ -1315,9 +971,7 @@
     current_capacity_ = max_capacity_;
   }
 
-  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-    LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
-  }
+  VLOG(jit) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
 
   SetFootprintLimit(current_capacity_);
 
@@ -1372,8 +1026,8 @@
       number_of_collections_++;
       live_bitmap_.reset(CodeCacheBitmap::Create(
           "code-cache-bitmap",
-          reinterpret_cast<uintptr_t>(executable_code_map_->Begin()),
-          reinterpret_cast<uintptr_t>(executable_code_map_->Begin() + current_capacity_ / 2)));
+          reinterpret_cast<uintptr_t>(code_map_->Begin()),
+          reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2)));
       collection_in_progress_ = true;
     }
   }
@@ -1388,21 +1042,17 @@
       do_full_collection = ShouldDoFullCollection();
     }
 
-    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-      LOG(INFO) << "Do "
-                << (do_full_collection ? "full" : "partial")
-                << " code cache collection, code="
-                << PrettySize(CodeCacheSize())
-                << ", data=" << PrettySize(DataCacheSize());
-    }
+    VLOG(jit) << "Do "
+              << (do_full_collection ? "full" : "partial")
+              << " code cache collection, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
 
     DoCollection(self, /* collect_profiling_info */ do_full_collection);
 
-    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-      LOG(INFO) << "After code cache collection, code="
-                << PrettySize(CodeCacheSize())
-                << ", data=" << PrettySize(DataCacheSize());
-    }
+    VLOG(jit) << "After code cache collection, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
 
     {
       MutexLock mu(self, lock_);
@@ -1449,16 +1099,14 @@
   std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
     MutexLock mu(self, lock_);
-    ScopedCodeCacheWrite scc(this);
+    ScopedCodeCacheWrite scc(code_map_.get());
     // Iterate over all compiled code and remove entries that are not marked.
     for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
       const void* code_ptr = it->first;
-      CHECK(IsExecutableAddress(code_ptr));
       uintptr_t allocation = FromCodeToAllocation(code_ptr);
       if (GetLiveBitmap()->Test(allocation)) {
         ++it;
       } else {
-        CHECK(IsExecutableAddress(it->first));
         method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
         it = method_code_map_.erase(it);
       }
@@ -1501,7 +1149,6 @@
     for (const auto& it : method_code_map_) {
       ArtMethod* method = it.second;
       const void* code_ptr = it.first;
-      CHECK(IsExecutableAddress(code_ptr));
       const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
         GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
@@ -1527,7 +1174,6 @@
     // Free all profiling infos of methods not compiled nor being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
       [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
-        CHECK(IsDataAddress(info));
         const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
         // We have previously cleared the ProfilingInfo pointer in the ArtMethod in the hope
         // that the compiled code would not get revived. As mutator threads run concurrently,
@@ -1588,7 +1234,6 @@
   --it;
 
   const void* code_ptr = it->first;
-  CHECK(IsExecutableAddress(code_ptr));
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   if (!method_header->Contains(pc)) {
     return nullptr;
@@ -1600,7 +1245,7 @@
     // is the one we expect. We change to the non-obsolete versions in the error message since the
     // obsolete version of the method might not be fully initialized yet. This situation can only
     // occur when we are in the process of allocating and setting up obsolete methods. Otherwise
-    // method and it->second should be identical. (See runtime/openjdkjvmti/ti_redefine.cc for more
+    // method and it->second should be identical. (See openjdkjvmti/ti_redefine.cc for more
     // information.)
     DCHECK_EQ(it->second->GetNonObsoleteMethod(), method->GetNonObsoleteMethod())
         << ArtMethod::PrettyMethod(method->GetNonObsoleteMethod()) << " "
@@ -1671,7 +1316,6 @@
   // store in the ArtMethod's ProfilingInfo pointer.
   QuasiAtomic::ThreadFenceRelease();
 
-  CHECK(IsDataAddress(info));
   method->SetProfilingInfo(info);
   profiling_infos_.push_back(info);
   histogram_profiling_info_memory_use_.AddValue(profile_info_size);
@@ -1684,8 +1328,7 @@
   if (code_mspace_ == mspace) {
     size_t result = code_end_;
     code_end_ += increment;
-    MemMap* writable_map = GetWritableMemMap();
-    return reinterpret_cast<void*>(result + writable_map->Begin());
+    return reinterpret_cast<void*>(result + code_map_->Begin());
   } else {
     DCHECK_EQ(data_mspace_, mspace);
     size_t result = data_end_;
@@ -1837,7 +1480,6 @@
 
 size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
   MutexLock mu(Thread::Current(), lock_);
-  CHECK(IsExecutableAddress(ptr));
   return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
 }
 
@@ -1873,27 +1515,22 @@
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   // Ensure the header ends up at expected instruction alignment.
   DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
-  CHECK(IsWritableAddress(result));
   used_memory_for_code_ += mspace_usable_size(result);
   return result;
 }
 
-void JitCodeCache::FreeRawCode(void* code) {
-  CHECK(IsExecutableAddress(code));
-  void* writable_code = ToWritableAddress(code);
-  used_memory_for_code_ -= mspace_usable_size(writable_code);
-  mspace_free(code_mspace_, writable_code);
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
 }
 
 uint8_t* JitCodeCache::AllocateData(size_t data_size) {
   void* result = mspace_malloc(data_mspace_, data_size);
-  CHECK(IsDataAddress(reinterpret_cast<uint8_t*>(result)));
   used_memory_for_data_ += mspace_usable_size(result);
   return reinterpret_cast<uint8_t*>(result);
 }
 
 void JitCodeCache::FreeData(uint8_t* data) {
-  CHECK(IsDataAddress(data));
   used_memory_for_data_ -= mspace_usable_size(data);
   mspace_free(data_mspace_, data);
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 175501f..daa1d61 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -113,6 +113,7 @@
                       size_t fp_spill_mask,
                       const uint8_t* code,
                       size_t code_size,
+                      size_t data_size,
                       bool osr,
                       Handle<mirror::ObjectArray<mirror::Object>> roots,
                       bool has_should_deoptimize_flag,
@@ -228,8 +229,6 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr);
-
   // The GC needs to disallow the reading of inline caches when it processes them,
   // to avoid having a class being used while it is being deleted.
   void AllowInlineCacheAccess() REQUIRES(!lock_);
@@ -248,13 +247,9 @@
   }
 
  private:
-  friend class ScopedCodeCacheWrite;
-
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
                MemMap* data_map,
-               MemMap* writable_code_map,
-               MemMap* code_sync_map,
                size_t initial_code_capacity,
                size_t initial_data_capacity,
                size_t max_capacity,
@@ -272,6 +267,7 @@
                               size_t fp_spill_mask,
                               const uint8_t* code,
                               size_t code_size,
+                              size_t data_size,
                               bool osr,
                               Handle<mirror::ObjectArray<mirror::Object>> roots,
                               bool has_should_deoptimize_flag,
@@ -296,7 +292,7 @@
       REQUIRES(!Locks::cha_lock_);
 
   // Free in the mspace allocations for `code_ptr`.
-  void FreeCodeAndData(const void* code_ptr) REQUIRES(lock_);
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
@@ -329,7 +325,7 @@
   bool CheckLiveCompiledCodeHasProfilingInfo()
       REQUIRES(lock_);
 
-  void FreeRawCode(void* code) REQUIRES(lock_);
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
   uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
   void FreeData(uint8_t* data) REQUIRES(lock_);
   uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
@@ -339,61 +335,25 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  MemMap* GetWritableMemMap() const {
-    if (writable_code_map_ == nullptr) {
-      // The system required us to map the JIT Code Cache RWX (see
-      // JitCodeCache::Create()).
-      return executable_code_map_.get();
-    } else {
-      // Executable code is mapped RX, and writable code is mapped RW
-      // to the underlying same memory, but at a different address.
-      return writable_code_map_.get();
-    }
-  }
-
-  bool IsDataAddress(const void* raw_addr) const;
-
-  bool IsExecutableAddress(const void* raw_addr) const;
-
-  bool IsWritableAddress(const void* raw_addr) const;
-
-  template <typename T>
-  T* ToExecutableAddress(T* writable_address) const;
-
-  void* ToWritableAddress(const void* executable_address) const;
-
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
   ConditionVariable lock_cond_ GUARDED_BY(lock_);
   // Whether there is a code cache collection in progress.
   bool collection_in_progress_ GUARDED_BY(lock_);
-  // JITting methods obviously requires both write and execute permissions on a region of memory.
-  // In tye typical (non-debugging) case, we separate the memory mapped view that can write the code
-  // from a view that the runtime uses to execute the code. Having these two views eliminates any
-  // single address region having rwx permissions.  An attacker could still write the writable
-  // address and then execute the executable address. We allocate the mappings with a random
-  // address relationship to each other which makes the attacker need two addresses rather than
-  // just one.  In the debugging case there is no file descriptor to back the
-  // shared memory, and hence we have to use a single mapping.
+  // Mem map which holds code.
+  std::unique_ptr<MemMap> code_map_;
   // Mem map which holds data (stack maps and profiling info).
   std::unique_ptr<MemMap> data_map_;
-  // Mem map which holds a non-writable view of code for JIT.
-  std::unique_ptr<MemMap> executable_code_map_;
-  // Mem map which holds a non-executable view of code for JIT.
-  std::unique_ptr<MemMap> writable_code_map_;
-  // Mem map which holds one executable page that we use for flushing instruction
-  // fetch buffers. The code on this page is never executed.
-  std::unique_ptr<MemMap> code_sync_map_;
   // The opaque mspace for allocating code.
   void* code_mspace_ GUARDED_BY(lock_);
   // The opaque mspace for allocating data.
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
-  // Holds non-writable compiled code associated to the ArtMethod.
+  // Holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
-  // Holds non-writable osr compiled code associated to the ArtMethod.
+  // Holds osr compiled code associated to the ArtMethod.
   SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 381e95f..af6a45f 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -356,7 +356,8 @@
             sampled_methods->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
           }
         } else {
-          CHECK_EQ(method.GetCounter(), 0u);
+          CHECK_EQ(method.GetCounter(), 0u) << method.PrettyMethod()
+              << " access_flags=" << method.GetAccessFlags();
         }
       }
     }
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4e82480..743604c 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -496,7 +496,7 @@
     MEMORY_TOOL_MAKE_UNDEFINED(base_begin_, base_size_);
     int result = munmap(base_begin_, base_size_);
     if (result == -1) {
-      PLOG(FATAL) << "munmap failed: " << BaseBegin() << "..." << BaseEnd();
+      PLOG(FATAL) << "munmap failed";
     }
   }
 
@@ -535,13 +535,8 @@
   }
 }
 
-MemMap* MemMap::RemapAtEnd(uint8_t* new_end,
-                           const char* tail_name,
-                           int tail_prot,
-                           int sharing_flags,
-                           std::string* error_msg,
-                           bool use_ashmem,
-                           unique_fd* shmem_fd) {
+MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
+                           std::string* error_msg, bool use_ashmem) {
   use_ashmem = use_ashmem && !kIsTargetLinux;
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
@@ -560,12 +555,6 @@
   size_ = new_end - reinterpret_cast<uint8_t*>(begin_);
   base_size_ = new_base_end - reinterpret_cast<uint8_t*>(base_begin_);
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
-  if (base_size_ == 0u) {
-    // All pages in this MemMap have been handed out. Invalidate base
-    // pointer to prevent the destructor calling munmap() on
-    // zero-length region (which can't succeed).
-    base_begin_ = nullptr;
-  }
   size_t tail_size = old_end - new_end;
   uint8_t* tail_base_begin = new_base_end;
   size_t tail_base_size = old_base_end - new_base_end;
@@ -573,14 +562,14 @@
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
   unique_fd fd;
-  int flags = MAP_ANONYMOUS | sharing_flags;
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
     std::string debug_friendly_name("dalvik-");
     debug_friendly_name += tail_name;
     fd.reset(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-    flags = MAP_FIXED | sharing_flags;
+    flags = MAP_PRIVATE | MAP_FIXED;
     if (fd.get() == -1) {
       *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
                                 tail_name, strerror(errno));
@@ -614,9 +603,6 @@
                               fd.get());
     return nullptr;
   }
-  if (shmem_fd != nullptr) {
-    shmem_fd->reset(fd.release());
-  }
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false);
 }
 
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index d8908ad..5603963 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -25,7 +25,6 @@
 #include <string>
 
 #include "android-base/thread_annotations.h"
-#include "android-base/unique_fd.h"
 
 namespace art {
 
@@ -38,8 +37,6 @@
 #define USE_ART_LOW_4G_ALLOCATOR 0
 #endif
 
-using android::base::unique_fd;
-
 #ifdef __linux__
 static constexpr bool kMadviseZeroes = true;
 #else
@@ -171,14 +168,11 @@
   }
 
   // Unmap the pages at end and remap them to create another memory map.
-  // sharing_flags should be either MAP_PRIVATE or MAP_SHARED.
   MemMap* RemapAtEnd(uint8_t* new_end,
                      const char* tail_name,
                      int tail_prot,
-                     int sharing_flags,
                      std::string* error_msg,
-                     bool use_ashmem = true,
-                     unique_fd* shmem_fd = nullptr);
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!MemMap::mem_maps_lock_);
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 99bf004..a4ebb16 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -74,7 +74,6 @@
     MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
                                 "MemMapTest_RemapAtEndTest_map1",
                                 PROT_READ | PROT_WRITE,
-                                MAP_PRIVATE,
                                 &error_msg);
     // Check the states of the two maps.
     EXPECT_EQ(m0->Begin(), base0) << error_msg;
@@ -457,7 +456,6 @@
   std::unique_ptr<MemMap> m1(m0->RemapAtEnd(base0 + 3 * page_size,
                                             "MemMapTest_AlignByTest_map1",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base1 = m1->Begin();
   ASSERT_TRUE(base1 != nullptr) << error_msg;
@@ -467,7 +465,6 @@
   std::unique_ptr<MemMap> m2(m1->RemapAtEnd(base1 + 4 * page_size,
                                             "MemMapTest_AlignByTest_map2",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base2 = m2->Begin();
   ASSERT_TRUE(base2 != nullptr) << error_msg;
@@ -477,7 +474,6 @@
   std::unique_ptr<MemMap> m3(m2->RemapAtEnd(base2 + 3 * page_size,
                                             "MemMapTest_AlignByTest_map1",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base3 = m3->Begin();
   ASSERT_TRUE(base3 != nullptr) << error_msg;
diff --git a/runtime/mirror/accessible_object.h b/runtime/mirror/accessible_object.h
index a217193..d489f14 100644
--- a/runtime/mirror/accessible_object.h
+++ b/runtime/mirror/accessible_object.h
@@ -17,11 +17,8 @@
 #ifndef ART_RUNTIME_MIRROR_ACCESSIBLE_OBJECT_H_
 #define ART_RUNTIME_MIRROR_ACCESSIBLE_OBJECT_H_
 
-#include "class.h"
-#include "gc_root.h"
 #include "object.h"
 #include "read_barrier_option.h"
-#include "thread.h"
 
 namespace art {
 
@@ -34,12 +31,6 @@
     return OFFSET_OF_OBJECT_MEMBER(AccessibleObject, flag_);
   }
 
-  template<bool kTransactionActive>
-  void SetAccessible(bool value) REQUIRES_SHARED(Locks::mutator_lock_) {
-    UNUSED(padding_);
-    return SetFieldBoolean<kTransactionActive>(FlagOffset(), value ? 1u : 0u);
-  }
-
   bool IsAccessible() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetFieldBoolean(FlagOffset());
   }
@@ -47,7 +38,7 @@
  private:
   uint8_t flag_;
   // Padding required for correct alignment of subclasses like Executable, Field, etc.
-  uint8_t padding_[1];
+  uint8_t padding_[1] ATTRIBUTE_UNUSED;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(AccessibleObject);
 };
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 63142d5..2281245 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,8 +27,7 @@
 #include "class.h"
 #include "gc/heap-inl.h"
 #include "obj_ptr-inl.h"
-#include "object-inl.h"
-#include "thread.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 6642869..c775cf4 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -464,14 +464,25 @@
   return FindInterfaceMethod(name, signature, pointer_size);
 }
 
+static inline bool IsValidInheritanceCheck(ObjPtr<mirror::Class> klass,
+                                           ObjPtr<mirror::Class> declaring_class)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (klass->IsArrayClass()) {
+    return declaring_class->IsObjectClass();
+  } else if (klass->IsInterface()) {
+    return declaring_class->IsObjectClass() || declaring_class == klass;
+  } else {
+    return klass->IsSubClass(declaring_class);
+  }
+}
+
 static inline bool IsInheritedMethod(ObjPtr<mirror::Class> klass,
                                      ObjPtr<mirror::Class> declaring_class,
                                      ArtMethod& method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK_EQ(declaring_class, method.GetDeclaringClass());
   DCHECK_NE(klass, declaring_class);
-  DCHECK(klass->IsArrayClass() ? declaring_class->IsObjectClass()
-                               : klass->IsSubClass(declaring_class));
+  DCHECK(IsValidInheritanceCheck(klass, declaring_class));
   uint32_t access_flags = method.GetAccessFlags();
   if ((access_flags & (kAccPublic | kAccProtected)) != 0) {
     return true;
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index d33df5c..ad48202 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -20,7 +20,8 @@
 #include "field.h"
 
 #include "art_field-inl.h"
-#include "mirror/dex_cache-inl.h"
+#include "class-inl.h"
+#include "dex_cache-inl.h"
 
 namespace art {
 
@@ -87,6 +88,10 @@
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(Field, type_), type);
 }
 
+inline Primitive::Type Field::GetTypeAsPrimitiveType() {
+  return GetType()->GetPrimitiveType();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index 40186a6..6845575 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -20,8 +20,10 @@
 #include "accessible_object.h"
 #include "base/enums.h"
 #include "gc_root.h"
+#include "modifiers.h"
 #include "obj_ptr.h"
 #include "object.h"
+#include "primitive.h"
 #include "read_barrier_option.h"
 
 namespace art {
@@ -69,10 +71,7 @@
     return (GetAccessFlags() & kAccVolatile) != 0;
   }
 
-  ALWAYS_INLINE Primitive::Type GetTypeAsPrimitiveType()
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetType()->GetPrimitiveType();
-  }
+  ALWAYS_INLINE Primitive::Type GetTypeAsPrimitiveType() REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::Class* GetType() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetFieldObject<mirror::Class>(OFFSET_OF_OBJECT_MEMBER(Field, type_));
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 086925b..6eb200d 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -393,14 +393,6 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
-inline uint8_t Object::GetFieldBoolean(MemberOffset field_offset) {
-  if (kVerifyFlags & kVerifyThis) {
-    VerifyObject(this);
-  }
-  return GetField<uint8_t, kIsVolatile>(field_offset);
-}
-
-template<VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
 inline int8_t Object::GetFieldByte(MemberOffset field_offset) {
   if (kVerifyFlags & kVerifyThis) {
     VerifyObject(this);
@@ -724,11 +716,10 @@
   }
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
-  T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
-  if (kIsVolatile) {
-    // TODO: Refactor to use a SequentiallyConsistent load instead.
-    QuasiAtomic::ThreadFenceAcquire();  // Ensure visibility of operations preceding store.
-  }
+  T* result = ReadBarrier::Barrier<T, kIsVolatile, kReadBarrierOption>(
+      this,
+      field_offset,
+      objref_addr);
   if (kVerifyFlags & kVerifyReads) {
     VerifyObject(result);
   }
@@ -764,15 +755,7 @@
   }
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
-  if (kIsVolatile) {
-    // TODO: Refactor to use a SequentiallyConsistent store instead.
-    QuasiAtomic::ThreadFenceRelease();  // Ensure that prior accesses are visible before store.
-    objref_addr->Assign(new_value.Ptr());
-    QuasiAtomic::ThreadFenceSequentiallyConsistent();
-                                // Ensure this store occurs before any volatile loads.
-  } else {
-    objref_addr->Assign(new_value.Ptr());
-  }
+  objref_addr->Assign<kIsVolatile>(new_value.Ptr());
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
@@ -843,13 +826,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_,
-                                                                        new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref, new_ref);
   return success;
 }
 
@@ -885,13 +867,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref.reference_,
-                                                                          new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
   return success;
 }
 
@@ -915,13 +896,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref.reference_,
-                                                         new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref, new_ref);
   return success;
 }
 
@@ -945,13 +925,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref.reference_,
-                                                         new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref, new_ref);
   return success;
 }
 
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
index 69365af..f076940 100644
--- a/runtime/mirror/object-readbarrier-inl.h
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -211,13 +211,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
-                                                           new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref, new_ref);
   return success;
 }
 
@@ -241,13 +240,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref.reference_,
-                                                           new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref, new_ref);
   return success;
 }
 
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 886780f..aedcd66 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -380,7 +380,12 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   ALWAYS_INLINE uint8_t GetFieldBoolean(MemberOffset field_offset)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kVerifyFlags & kVerifyThis) {
+      VerifyObject(this);
+    }
+    return GetField<uint8_t, kIsVolatile>(field_offset);
+  }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   ALWAYS_INLINE int8_t GetFieldByte(MemberOffset field_offset)
diff --git a/runtime/mirror/object_reference-inl.h b/runtime/mirror/object_reference-inl.h
index 22fb83c..60f3ce1 100644
--- a/runtime/mirror/object_reference-inl.h
+++ b/runtime/mirror/object_reference-inl.h
@@ -30,17 +30,10 @@
 }
 
 template<class MirrorType>
-HeapReference<MirrorType> HeapReference<MirrorType>::FromObjPtr(ObjPtr<MirrorType> ptr) {
-  return HeapReference<MirrorType>(ptr.Ptr());
-}
-
-template<class MirrorType>
 bool HeapReference<MirrorType>::CasWeakRelaxed(MirrorType* expected_ptr, MirrorType* new_ptr) {
-  HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_ptr));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_ptr));
-  Atomic<uint32_t>* atomic_reference = reinterpret_cast<Atomic<uint32_t>*>(&this->reference_);
-  return atomic_reference->CompareExchangeWeakRelaxed(expected_ref.reference_,
-                                                      new_ref.reference_);
+  return reference_.CompareExchangeWeakRelaxed(
+      Compression::Compress(expected_ptr),
+      Compression::Compress(new_ptr));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index a96a120..c62ee6c 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 
+#include "atomic.h"
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
 #include "obj_ptr.h"
@@ -30,20 +31,43 @@
 // extra platform specific padding.
 #define MANAGED PACKED(4)
 
+template<bool kPoisonReferences, class MirrorType>
+class PtrCompression {
+ public:
+  // Compress reference to its bit representation.
+  static uint32_t Compress(MirrorType* mirror_ptr) {
+    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
+    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
+  }
+
+  // Uncompress an encoded reference from its bit representation.
+  static MirrorType* Decompress(uint32_t ref) {
+    uintptr_t as_bits = kPoisonReferences ? -ref : ref;
+    return reinterpret_cast<MirrorType*>(as_bits);
+  }
+
+  // Convert an ObjPtr to a compressed reference.
+  static uint32_t Compress(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Compress(ptr.Ptr());
+  }
+};
+
 // Value type representing a reference to a mirror::Object of type MirrorType.
 template<bool kPoisonReferences, class MirrorType>
 class MANAGED ObjectReference {
+ private:
+  using Compression = PtrCompression<kPoisonReferences, MirrorType>;
+
  public:
-  MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    return UnCompress();
+  MirrorType* AsMirrorPtr() const {
+    return Compression::Decompress(reference_);
   }
 
-  void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
-    reference_ = Compress(other);
+  void Assign(MirrorType* other) {
+    reference_ = Compression::Compress(other);
   }
 
-  void Assign(ObjPtr<MirrorType> ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void Clear() {
     reference_ = 0;
@@ -58,48 +82,71 @@
     return reference_;
   }
 
+  static ObjectReference<kPoisonReferences, MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return ObjectReference<kPoisonReferences, MirrorType>(mirror_ptr);
+  }
+
  protected:
-  explicit ObjectReference(MirrorType* mirror_ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      : reference_(Compress(mirror_ptr)) {
+  explicit ObjectReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(Compression::Compress(mirror_ptr)) {
   }
 
-  // Compress reference to its bit representation.
-  static uint32_t Compress(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
-    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
-    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
-  }
-
-  // Uncompress an encoded reference from its bit representation.
-  MirrorType* UnCompress() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    uintptr_t as_bits = kPoisonReferences ? -reference_ : reference_;
-    return reinterpret_cast<MirrorType*>(as_bits);
-  }
-
-  friend class Object;
-
   // The encoded reference to a mirror::Object.
   uint32_t reference_;
 };
 
 // References between objects within the managed heap.
+// Similar API to ObjectReference, but not a value type. Supports atomic access.
 template<class MirrorType>
-class MANAGED HeapReference : public ObjectReference<kPoisonHeapReferences, MirrorType> {
+class MANAGED HeapReference {
+ private:
+  using Compression = PtrCompression<kPoisonHeapReferences, MirrorType>;
+
  public:
+  HeapReference() REQUIRES_SHARED(Locks::mutator_lock_) : HeapReference(nullptr) {}
+
+  template <bool kIsVolatile = false>
+  MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Compression::Decompress(
+        kIsVolatile ? reference_.LoadSequentiallyConsistent() : reference_.LoadJavaData());
+  }
+
+  template <bool kIsVolatile = false>
+  void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kIsVolatile) {
+      reference_.StoreSequentiallyConsistent(Compression::Compress(other));
+    } else {
+      reference_.StoreJavaData(Compression::Compress(other));
+    }
+  }
+
+  template <bool kIsVolatile = false>
+  void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void Clear() {
+    reference_.StoreJavaData(0);
+    DCHECK(IsNull());
+  }
+
+  bool IsNull() const {
+    return reference_.LoadJavaData() == 0;
+  }
+
   static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     return HeapReference<MirrorType>(mirror_ptr);
   }
 
-  static HeapReference<MirrorType> FromObjPtr(ObjPtr<MirrorType> ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   bool CasWeakRelaxed(MirrorType* old_ptr, MirrorType* new_ptr)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   explicit HeapReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
-      : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
+      : reference_(Compression::Compress(mirror_ptr)) {}
+
+  // The encoded reference to a mirror::Object. Atomically updateable.
+  Atomic<uint32_t> reference_;
 };
 
 static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize,
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 9f59a1f..2e4dd8a 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -27,7 +27,7 @@
 #include "dex_file_annotations.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
-#include "mirror/field.h"
+#include "mirror/field-inl.h"
 #include "native_util.h"
 #include "reflection-inl.h"
 #include "scoped_fast_native_object_access-inl.h"
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 761362f..b2bdeed 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -67,10 +67,12 @@
   if (kUseReadBarrier) {
     // Need to make sure the reference stored in the field is a to-space one before attempting the
     // CAS or the CAS could fail incorrectly.
+    // Note that the read barrier load does NOT need to be volatile.
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj.Ptr()) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
+    ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kWithReadBarrier,
+        /* kAlwaysUpdateField */ true>(
         obj.Ptr(),
         MemberOffset(offset),
         field_addr);
@@ -112,6 +114,7 @@
                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(javaObj);
+  // TODO: A release store is likely to be faster on future processors.
   QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField32<false>(MemberOffset(offset), newValue);
diff --git a/runtime/oat.h b/runtime/oat.h
index c4a983e..1d79ed6 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: MIPS Baker thunks.
-  static constexpr uint8_t kOatVersion[] = { '1', '3', '1', '\0' };
+  // Last oat version changed reason: Add dex section layout info to header.
+  static constexpr uint8_t kOatVersion[] = { '1', '3', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 4033f8c..0af0622 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -544,6 +544,19 @@
       return false;
     }
 
+    uint32_t dex_layout_sections_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_layout_sections_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after dex layout sections offset",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str());
+      return false;
+    }
+    const DexLayoutSections* const dex_layout_sections = dex_layout_sections_offset != 0
+        ? reinterpret_cast<const DexLayoutSections*>(Begin() + dex_layout_sections_offset)
+        : nullptr;
+
     uint32_t method_bss_mapping_offset;
     if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &method_bss_mapping_offset))) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
@@ -635,7 +648,8 @@
                                               lookup_table_data,
                                               method_bss_mapping,
                                               class_offsets_pointer,
-                                              current_dex_cache_arrays);
+                                              current_dex_cache_arrays,
+                                              dex_layout_sections);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
     // Add the location and canonical location (if different) to the oat_dex_files_ table.
@@ -1362,7 +1376,8 @@
                                 const uint8_t* lookup_table_data,
                                 const MethodBssMapping* method_bss_mapping_data,
                                 const uint32_t* oat_class_offsets_pointer,
-                                uint8_t* dex_cache_arrays)
+                                uint8_t* dex_cache_arrays,
+                                const DexLayoutSections* dex_layout_sections)
     : oat_file_(oat_file),
       dex_file_location_(dex_file_location),
       canonical_dex_file_location_(canonical_dex_file_location),
@@ -1371,7 +1386,8 @@
       lookup_table_data_(lookup_table_data),
       method_bss_mapping_(method_bss_mapping_data),
       oat_class_offsets_pointer_(oat_class_offsets_pointer),
-      dex_cache_arrays_(dex_cache_arrays) {
+      dex_cache_arrays_(dex_cache_arrays),
+      dex_layout_sections_(dex_layout_sections) {
   // Initialize TypeLookupTable.
   if (lookup_table_data_ != nullptr) {
     // Peek the number of classes from the DexFile.
@@ -1477,6 +1493,23 @@
   return nullptr;
 }
 
+// Madvise the dex file based on the state we are moving to.
+void OatDexFile::MadviseDexFile(const DexFile& dex_file, MadviseState state) {
+  if (state == MadviseState::kMadviseStateAtLoad) {
+    // Default every dex file to MADV_RANDOM when its loaded by default.
+    MadviseLargestPageAlignedRegion(dex_file.Begin(),
+                                    dex_file.Begin() + dex_file.Size(),
+                                    MADV_RANDOM);
+  }
+  const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
+  if (oat_dex_file != nullptr) {
+    // Should always be there.
+    const DexLayoutSections* const sections = oat_dex_file->GetDexLayoutSections();
+    CHECK(sections != nullptr);
+    sections->Madvise(&dex_file, state);
+  }
+}
+
 OatFile::OatClass::OatClass(const OatFile* oat_file,
                             mirror::Class::Status status,
                             OatClassType type,
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index be7d495..9a7fe51 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -26,6 +26,7 @@
 #include "base/stringpiece.h"
 #include "compiler_filter.h"
 #include "dex_file.h"
+#include "dex_file_layout.h"
 #include "method_bss_mapping.h"
 #include "mirror/class.h"
 #include "oat.h"
@@ -38,6 +39,7 @@
 
 class BitVector;
 class ElfFile;
+class DexLayoutSections;
 template <class MirrorType> class GcRoot;
 class MemMap;
 class OatDexFile;
@@ -442,6 +444,9 @@
                                                const char* descriptor,
                                                size_t hash);
 
+  // Madvise the dex file based on the state we are moving to.
+  static void MadviseDexFile(const DexFile& dex_file, MadviseState state);
+
   TypeLookupTable* GetTypeLookupTable() const {
     return lookup_table_.get();
   }
@@ -451,6 +456,11 @@
   // Create only with a type lookup table, used by the compiler to speed up compilation.
   explicit OatDexFile(std::unique_ptr<TypeLookupTable>&& lookup_table);
 
+  // Return the dex layout sections.
+  const DexLayoutSections* GetDexLayoutSections() const {
+    return dex_layout_sections_;
+  }
+
  private:
   OatDexFile(const OatFile* oat_file,
              const std::string& dex_file_location,
@@ -460,7 +470,8 @@
              const uint8_t* lookup_table_data,
              const MethodBssMapping* method_bss_mapping,
              const uint32_t* oat_class_offsets_pointer,
-             uint8_t* dex_cache_arrays);
+             uint8_t* dex_cache_arrays,
+             const DexLayoutSections* dex_layout_sections);
 
   static void AssertAotCompiler();
 
@@ -474,6 +485,7 @@
   const uint32_t* const oat_class_offsets_pointer_ = 0u;
   uint8_t* const dex_cache_arrays_ = nullptr;
   mutable std::unique_ptr<TypeLookupTable> lookup_table_;
+  const DexLayoutSections* const dex_layout_sections_ = nullptr;
 
   friend class OatFile;
   friend class OatFileBase;
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index dae41c1..3794f51 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -37,6 +37,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "utils.h"
 #include "vdex_file.h"
+#include "class_loader_context.h"
 
 namespace art {
 
@@ -225,13 +226,25 @@
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) {
+OatFileAssistant::MakeUpToDate(bool profile_changed,
+                               const std::string& class_loader_context,
+                               std::string* error_msg) {
   CompilerFilter::Filter target;
   if (!GetRuntimeCompilerFilterOption(&target, error_msg)) {
     return kUpdateNotAttempted;
   }
 
   OatFileInfo& info = GetBestInfo();
+  // TODO(calin, jeffhao): the context should really be passed to GetDexOptNeeded: b/62269291.
+  // This is actually not trivial in the current logic as it will interact with the collision
+  // check:
+  //   - currently, if the context does not match but we have no collisions we still accept the
+  //     oat file.
+  //   - if GetDexOptNeeded would return kDex2OatFromScratch for a context mismatch and we make
+  //     the oat code up to date the collision check becomes useless.
+  //   - however, MakeUpToDate will not always succeed (e.g. for primary apks, or for dex files
+  //     loaded in other processes). So it boils down to how far do we want to complicate
+  //     the logic in order to enable the use of oat files. Maybe its time to try simplify it.
   switch (info.GetDexOptNeeded(target, profile_changed, /*downgrade*/ false)) {
     case kNoDexOptNeeded:
       return kUpdateSucceeded;
@@ -243,7 +256,7 @@
     case kDex2OatForBootImage:
     case kDex2OatForRelocation:
     case kDex2OatForFilter:
-      return GenerateOatFileNoChecks(info, target, error_msg);
+      return GenerateOatFileNoChecks(info, target, class_loader_context, error_msg);
   }
   UNREACHABLE();
 }
@@ -628,7 +641,10 @@
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks(
-      OatFileAssistant::OatFileInfo& info, CompilerFilter::Filter filter, std::string* error_msg) {
+      OatFileAssistant::OatFileInfo& info,
+      CompilerFilter::Filter filter,
+      const std::string& class_loader_context,
+      std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   Runtime* runtime = Runtime::Current();
@@ -704,6 +720,7 @@
   args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
   args.push_back("--oat-location=" + oat_file_name);
   args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
+  args.push_back("--class-loader-context=" + class_loader_context);
 
   if (!Dex2Oat(args, error_msg)) {
     // Manually delete the oat and vdex files. This ensures there is no garbage
@@ -743,14 +760,6 @@
 
   std::vector<std::string> argv;
   argv.push_back(runtime->GetCompilerExecutable());
-  argv.push_back("--runtime-arg");
-  argv.push_back("-classpath");
-  argv.push_back("--runtime-arg");
-  std::string class_path = runtime->GetClassPathString();
-  if (class_path == "") {
-    class_path = OatFile::kSpecialSharedLibrary;
-  }
-  argv.push_back(class_path);
   if (runtime->IsJavaDebuggable()) {
     argv.push_back("--debuggable");
   }
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 320aa4f..5eec943 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -185,12 +185,17 @@
   // profile_changed should be true to indicate the profile has recently
   // changed for this dex location.
   //
+  // If the dex files need to be made up to date, class_loader_context will be
+  // passed to dex2oat.
+  //
   // Returns the result of attempting to update the code.
   //
   // If the result is not kUpdateSucceeded, the value of error_msg will be set
   // to a string describing why there was a failure or the update was not
   // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate MakeUpToDate(bool profile_changed, std::string* error_msg);
+  ResultOfAttemptToUpdate MakeUpToDate(bool profile_changed,
+                                       const std::string& class_loader_context,
+                                       std::string* error_msg);
 
   // Returns an oat file that can be used for loading dex files.
   // Returns null if no suitable oat file was found.
@@ -389,7 +394,8 @@
   };
 
   // Generate the oat file for the given info from the dex file using the
-  // current runtime compiler options and the specified filter.
+  // current runtime compiler options, the specified filter and class loader
+  // context.
   // This does not check the current status before attempting to generate the
   // oat file.
   //
@@ -398,6 +404,7 @@
   // attempted. error_msg must not be null.
   ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info,
                                                   CompilerFilter::Filter target,
+                                                  const std::string& class_loader_context,
                                                   std::string* error_msg);
 
   // Return info for the best oat file.
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index c59dafc..e048177 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -27,8 +27,10 @@
 
 #include "art_field-inl.h"
 #include "class_linker-inl.h"
+#include "class_loader_context.h"
 #include "common_runtime_test.h"
 #include "dexopt_test.h"
+#include "oat_file.h"
 #include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
@@ -37,6 +39,8 @@
 
 namespace art {
 
+static const std::string kSpecialSharedLibrary = "&";
+
 class OatFileAssistantTest : public DexoptTest {};
 
 class OatFileAssistantNoDex2OatTest : public DexoptTest {
@@ -116,7 +120,8 @@
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg));
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+          oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -769,7 +774,8 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg)) <<
+          error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -949,7 +955,7 @@
   // We should get kUpdateSucceeded from MakeUpToDate since there's nothing
   // that can be done in this situation.
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg));
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg));
 
   // Verify it didn't create an oat in the default location (dalvik-cache).
   OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
@@ -1028,7 +1034,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg));
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -1175,7 +1181,8 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=quicken");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg)) <<
+          error_msg;
   EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
   EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
@@ -1183,7 +1190,8 @@
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg))
+          << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -1191,7 +1199,7 @@
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=bogus");
   EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(false, &error_msg));
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg));
 }
 
 TEST(OatFileAssistantUtilsTest, DexLocationToOdexFilename) {
@@ -1251,7 +1259,8 @@
       OatFileAssistant::kDefaultCompilerFilterForDexLoading;
   std::string error_msg;
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg)) <<
+          error_msg;
   EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
             oat_file_assistant.GetDexOptNeeded(default_filter));
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -1259,6 +1268,50 @@
   EXPECT_EQ(default_filter, oat_file->GetCompilerFilter());
 }
 
+TEST_F(OatFileAssistantTest, MakeUpToDateWithSpecialSharedLibrary) {
+  std::string dex_location = GetScratchDir() + "/TestDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  const CompilerFilter::Filter default_filter =
+      OatFileAssistant::kDefaultCompilerFilterForDexLoading;
+  std::string error_msg;
+  int status = oat_file_assistant.MakeUpToDate(false, kSpecialSharedLibrary, &error_msg);
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, status) << error_msg;
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(default_filter));
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  EXPECT_NE(nullptr, oat_file.get());
+  EXPECT_EQ(kSpecialSharedLibrary,
+            oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
+}
+
+TEST_F(OatFileAssistantTest, MakeUpToDateWithContext) {
+  std::string dex_location = GetScratchDir() + "/TestDex.jar";
+  std::string context_location = GetScratchDir() + "/ContextDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+  Copy(GetDexSrc2(), context_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  const CompilerFilter::Filter default_filter =
+      OatFileAssistant::kDefaultCompilerFilterForDexLoading;
+  std::string error_msg;
+  std::string context_str = "PCL[" + context_location + "]";
+  int status = oat_file_assistant.MakeUpToDate(false, context_str, &error_msg);
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, status) << error_msg;
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+  oat_file_assistant.GetDexOptNeeded(default_filter));
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  EXPECT_NE(nullptr, oat_file.get());
+  std::unique_ptr<ClassLoaderContext> context =
+      ClassLoaderContext::Create(context_str);
+  context->OpenDexFiles(kRuntimeISA, "");
+  EXPECT_EQ(context->EncodeContextForOatFile(""),
+      oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
+}
+
 // TODO: More Tests:
 //  * Test class linker falls back to unquickened dex for DexNoOat
 //  * Test class linker falls back to unquickened dex for MultiDexNoOat
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index e950fca..de8f7ed 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -48,9 +48,12 @@
 
 using android::base::StringPrintf;
 
-// If true, then we attempt to load the application image if it exists.
+// If true, we attempt to load the application image if it exists.
 static constexpr bool kEnableAppImage = true;
 
+// If true, we advise the kernel about dex file mem map accesses.
+static constexpr bool kMadviseDexFileAccesses = false;
+
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   DCHECK(oat_file != nullptr);
@@ -329,12 +332,14 @@
 
 // Check for class-def collisions in dex files.
 //
-// This first walks the class loader chain, getting all the dex files from the class loader. If
-// the class loader is null or one of the class loaders in the chain is unsupported, we collect
-// dex files from all open non-boot oat files to be safe.
+// This first walks the class loader chain present in the given context, getting all the dex files
+// from the class loader.
 //
-// This first checks whether the shared libraries are in the expected order and the oat files
-// have the expected checksums. If so, we exit early. Otherwise, we do the collision check.
+// If the context is null (which means the initial class loader was null or unsupported)
+// this returns false. b/37777332.
+//
+// This first checks whether all class loaders in the context have the same type and
+// classpath. If so, we exit early. Otherwise, we do the collision check.
 //
 // The collision check works by maintaining a heap with one class from each dex file, sorted by the
 // class descriptor. Then a dex-file/class pair is continually removed from the heap and compared
@@ -342,23 +347,11 @@
 // the two elements agree on whether their dex file was from an already-loaded oat-file or the
 // new oat file. Any disagreement indicates a collision.
 bool OatFileManager::HasCollisions(const OatFile* oat_file,
-                                   jobject class_loader,
-                                   jobjectArray dex_elements,
+                                   const ClassLoaderContext* context,
                                    std::string* error_msg /*out*/) const {
   DCHECK(oat_file != nullptr);
   DCHECK(error_msg != nullptr);
 
-  // If the class_loader is null there's not much we can do. This happens if a dex files is loaded
-  // directly with DexFile APIs instead of using class loaders.
-  if (class_loader == nullptr) {
-    LOG(WARNING) << "Opening an oat file without a class loader. "
-        << "Are you using the deprecated DexFile APIs?";
-    return false;
-  }
-
-  std::unique_ptr<ClassLoaderContext> context =
-      ClassLoaderContext::CreateContextForClassLoader(class_loader, dex_elements);
-
   // The context might be null if there are unrecognized class loaders in the chain or they
   // don't meet sensible sanity conditions. In this case we assume that the app knows what it's
   // doing and accept the oat file.
@@ -406,6 +399,17 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   Runtime* const runtime = Runtime::Current();
 
+  std::unique_ptr<ClassLoaderContext> context;
+  // If the class_loader is null there's not much we can do. This happens if a dex files is loaded
+  // directly with DexFile APIs instead of using class loaders.
+  if (class_loader == nullptr) {
+    LOG(WARNING) << "Opening an oat file without a class loader. "
+                 << "Are you using the deprecated DexFile APIs?";
+    context = nullptr;
+  } else {
+    context = ClassLoaderContext::CreateContextForClassLoader(class_loader, dex_elements);
+  }
+
   OatFileAssistant oat_file_assistant(dex_location,
                                       kRuntimeISA,
                                       !runtime->IsAotCompiler());
@@ -425,7 +429,12 @@
     // Update the oat file on disk if we can, based on the --compiler-filter
     // option derived from the current runtime options.
     // This may fail, but that's okay. Best effort is all that matters here.
-    switch (oat_file_assistant.MakeUpToDate(/*profile_changed*/false, /*out*/ &error_msg)) {
+
+    const std::string& dex2oat_context = context == nullptr
+        ? OatFile::kSpecialSharedLibrary
+        : context->EncodeContextForDex2oat(/*base_dir*/ "");
+    switch (oat_file_assistant.MakeUpToDate(
+        /*profile_changed*/false, dex2oat_context, /*out*/ &error_msg)) {
       case OatFileAssistant::kUpdateFailed:
         LOG(WARNING) << error_msg;
         break;
@@ -451,7 +460,7 @@
   if ((class_loader != nullptr || dex_elements != nullptr) && oat_file != nullptr) {
     // Take the file only if it has no collisions, or we must take it because of preopting.
     bool accept_oat_file =
-        !HasCollisions(oat_file.get(), class_loader, dex_elements, /*out*/ &error_msg);
+        !HasCollisions(oat_file.get(), context.get(), /*out*/ &error_msg);
     if (!accept_oat_file) {
       // Failed the collision check. Print warning.
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
@@ -563,6 +572,11 @@
     }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
+    } else if (kMadviseDexFileAccesses) {
+      // Opened dex files from an oat file, madvise them to their loaded state.
+       for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+         OatDexFile::MadviseDexFile(*dex_file, MadviseState::kMadviseStateAtLoad);
+       }
     }
   }
 
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 05a5f5b..4523494 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -35,6 +35,7 @@
 }  // namespace space
 }  // namespace gc
 
+class ClassLoaderContext;
 class DexFile;
 class OatFile;
 
@@ -105,15 +106,17 @@
   void DumpForSigQuit(std::ostream& os);
 
  private:
-  // Check that the shared libraries in the given oat file match those in the given class loader and
-  // dex elements. If the class loader is null or we do not support one of the class loaders in the
-  // chain, compare against all non-boot oat files instead. If the shared libraries are not ok,
-  // check for duplicate class definitions of the given oat file against the oat files (either from
-  // the class loader and dex elements if possible or all non-boot oat files otherwise).
+  // Check that the class loader context of the given oat file matches the given context.
+  // This will perform a check that all class loaders in the chain have the same type and
+  // classpath.
+  // If the context is null (which means the initial class loader was null or unsupported)
+  // this returns false.
+  // If the context does not validate the method will check for duplicate class definitions of
+  // the given oat file against the oat files (either from the class loaders if possible or all
+  // non-boot oat files otherwise).
   // Return true if there are any class definition collisions in the oat_file.
   bool HasCollisions(const OatFile* oat_file,
-                     jobject class_loader,
-                     jobjectArray dex_elements,
+                     const ClassLoaderContext* context,
                      /*out*/ std::string* error_msg) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index db10103..b592247 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -533,21 +533,19 @@
 void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) {
   DCHECK(is_deoptimization_);
 
-  if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
-    LOG(INFO) << "Single-frame deopting:";
-    DumpFramesWithType(self_, true);
-  }
-
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
   // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
-  LOG(INFO) << "Deoptimizing "
-            << deopt_method->PrettyMethod()
-            << " due to "
-            << GetDeoptimizationKindName(kind);
+  if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
+    LOG(INFO) << "Single-frame deopting: "
+              << deopt_method->PrettyMethod()
+              << " due to "
+              << GetDeoptimizationKindName(kind);
+    DumpFramesWithType(self_, /* details */ true);
+  }
   if (Runtime::Current()->UseJitCompilation()) {
     Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
         deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index b0935c0..6424599 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -33,7 +33,8 @@
 // Disabled for performance reasons.
 static constexpr bool kCheckDebugDisallowReadBarrierCount = false;
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField>
+template <typename MirrorType, bool kIsVolatile, ReadBarrierOption kReadBarrierOption,
+          bool kAlwaysUpdateField>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
@@ -55,7 +56,7 @@
       }
       ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
           fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr));
-      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>();
       MirrorType* old_ref = ref;
       if (is_gray) {
         // Slow-path.
@@ -71,9 +72,9 @@
       return ref;
     } else if (kUseBrooksReadBarrier) {
       // To be implemented.
-      return ref_addr->AsMirrorPtr();
+      return ref_addr->template AsMirrorPtr<kIsVolatile>();
     } else if (kUseTableLookupReadBarrier) {
-      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>();
       MirrorType* old_ref = ref;
       // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
       gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -93,7 +94,7 @@
     }
   } else {
     // No read barrier.
-    return ref_addr->AsMirrorPtr();
+    return ref_addr->template AsMirrorPtr<kIsVolatile>();
   }
 }
 
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index d36acbc..8a106aa 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -46,9 +46,13 @@
   // fast-debug environment.
   DECLARE_RUNTIME_DEBUG_FLAG(kEnableReadBarrierInvariantChecks);
 
+  // Return the reference at ref_addr, invoking read barrier as appropriate.
+  // Ref_addr is an address within obj.
   // It's up to the implementation whether the given field gets updated whereas the return value
   // must be an updated reference unless kAlwaysUpdateField is true.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+  template <typename MirrorType,
+            bool kIsVolatile,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             bool kAlwaysUpdateField = false>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 609f0d6..4584351 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -49,7 +49,9 @@
   } else if (method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveRefsOnly)) {
     return GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsOnly);
   } else {
-    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveEverything));
+    DCHECK(method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveEverything) ||
+           method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveEverythingForClinit) ||
+           method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveEverythingForSuspendCheck));
     return GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveEverything);
   }
 }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 0c1344e..4e84468 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -714,7 +714,7 @@
   static constexpr int kProfileForground = 0;
   static constexpr int kProfileBackground = 1;
 
-  static constexpr uint32_t kCalleeSaveSize = 4u;
+  static constexpr uint32_t kCalleeSaveSize = 6u;
 
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
   uint64_t callee_save_methods_[kCalleeSaveSize];
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 19df0d2..2e06536 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -634,7 +634,7 @@
 void StackVisitor::SanityCheckFrame() const {
   if (kIsDebugBuild) {
     ArtMethod* method = GetMethod();
-    auto* declaring_class = method->GetDeclaringClass();
+    mirror::Class* declaring_class = method->GetDeclaringClass();
     // Runtime methods have null declaring class.
     if (!method->IsRuntimeMethod()) {
       CHECK(declaring_class != nullptr);
@@ -647,11 +647,14 @@
     LinearAlloc* const linear_alloc = runtime->GetLinearAlloc();
     if (!linear_alloc->Contains(method)) {
       // Check class linker linear allocs.
-      mirror::Class* klass = method->GetDeclaringClass();
+      // We get the canonical method as copied methods may have their declaring
+      // class from another class loader.
+      ArtMethod* canonical = method->GetCanonicalMethod();
+      mirror::Class* klass = canonical->GetDeclaringClass();
       LinearAlloc* const class_linear_alloc = (klass != nullptr)
           ? runtime->GetClassLinker()->GetAllocatorForClassLoader(klass->GetClassLoader())
           : linear_alloc;
-      if (!class_linear_alloc->Contains(method)) {
+      if (!class_linear_alloc->Contains(canonical)) {
         // Check image space.
         bool in_image = false;
         for (auto& space : runtime->GetHeap()->GetContinuousSpaces()) {
@@ -660,14 +663,14 @@
             const auto& header = image_space->GetImageHeader();
             const ImageSection& methods = header.GetMethodsSection();
             const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
-            const size_t offset =  reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+            const size_t offset =  reinterpret_cast<const uint8_t*>(canonical) - image_space->Begin();
             if (methods.Contains(offset) || runtime_methods.Contains(offset)) {
               in_image = true;
               break;
             }
           }
         }
-        CHECK(in_image) << method->PrettyMethod() << " not in linear alloc or image";
+        CHECK(in_image) << canonical->PrettyMethod() << " not in linear alloc or image";
       }
     }
     if (cur_quick_frame_ != nullptr) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 776096a..7540fd2 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1705,8 +1705,13 @@
 
 class SCOPED_CAPABILITY ScopedAssertNoThreadSuspension {
  public:
-  ALWAYS_INLINE explicit ScopedAssertNoThreadSuspension(const char* cause)
-      ACQUIRE(Roles::uninterruptible_) {
+  ALWAYS_INLINE ScopedAssertNoThreadSuspension(const char* cause,
+                                               bool enabled = true)
+      ACQUIRE(Roles::uninterruptible_)
+      : enabled_(enabled) {
+    if (!enabled_) {
+      return;
+    }
     if (kIsDebugBuild) {
       self_ = Thread::Current();
       old_cause_ = self_->StartAssertNoThreadSuspension(cause);
@@ -1715,6 +1720,9 @@
     }
   }
   ALWAYS_INLINE ~ScopedAssertNoThreadSuspension() RELEASE(Roles::uninterruptible_) {
+    if (!enabled_) {
+      return;
+    }
     if (kIsDebugBuild) {
       self_->EndAssertNoThreadSuspension(old_cause_);
     } else {
@@ -1724,6 +1732,7 @@
 
  private:
   Thread* self_;
+  const bool enabled_;
   const char* old_cause_;
 };
 
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 50deb1f..e923aff 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -119,6 +119,27 @@
   return abort_message_;
 }
 
+bool Transaction::WriteConstraint(mirror::Object* obj, ArtField* field) {
+  MutexLock mu(Thread::Current(), log_lock_);
+  if (strict_  // no constraint for boot image
+      && field->IsStatic()  // no constraint instance updating
+      && obj != root_) {  // modifying other classes' static field, fail
+    return true;
+  }
+  return false;
+}
+
+bool Transaction::ReadConstraint(mirror::Object* obj, ArtField* field) {
+  DCHECK(field->IsStatic());
+  DCHECK(obj->IsClass());
+  MutexLock mu(Thread::Current(), log_lock_);
+  if (!strict_ ||   // no constraint for boot image
+      obj == root_) {  // self-updating, pass
+    return false;
+  }
+  return true;
+}
+
 void Transaction::RecordWriteFieldBoolean(mirror::Object* obj,
                                           MemberOffset field_offset,
                                           uint8_t value,
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 64349de..4e9cde5 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -135,6 +135,14 @@
       REQUIRES(!log_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  bool ReadConstraint(mirror::Object* obj, ArtField* field)
+      REQUIRES(!log_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool WriteConstraint(mirror::Object* obj, ArtField* field)
+      REQUIRES(!log_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   class ObjectLog : public ValueObject {
    public:
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ffa9d45..3fe18c7 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -18,6 +18,7 @@
 
 #include <inttypes.h>
 #include <pthread.h>
+#include <sys/mman.h>  // For madvise
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
@@ -940,4 +941,18 @@
   }
 }
 
+int MadviseLargestPageAlignedRegion(const uint8_t* begin, const uint8_t* end, int advice) {
+  DCHECK_LE(begin, end);
+  begin = AlignUp(begin, kPageSize);
+  end = AlignDown(end, kPageSize);
+  if (begin < end) {
+    int result = madvise(const_cast<uint8_t*>(begin), end - begin, advice);
+    if (result != 0) {
+      PLOG(WARNING) << "madvise failed " << result;
+    }
+    return result;
+  }
+  return 0;
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index f1f5576..739681d 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -338,6 +338,9 @@
   return (opnd < 0) ? -1 : ((opnd == 0) ? 0 : 1);
 }
 
+// Madvise the largest page aligned region within begin and end.
+int MadviseLargestPageAlignedRegion(const uint8_t* begin, const uint8_t* end, int advice);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index e8f947c..b955220 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -20,6 +20,7 @@
 
 #include <memory>
 
+#include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
@@ -134,6 +135,9 @@
   } else {
     // Fetch the next dex file. Return null if there is none.
     const uint8_t* data = cursor + reinterpret_cast<const DexFile::Header*>(cursor)->file_size_;
+    // Dex files are required to be 4 byte aligned. the OatWriter makes sure they are, see
+    // OatWriter::SeekToDexFiles.
+    data = AlignUp(data, 4);
     return (data == DexEnd()) ? nullptr : data;
   }
 }
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 0351fd3..63058cf 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -72,8 +72,8 @@
 
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
-    // Last update: Change method lookup.
-    static constexpr uint8_t kVdexVersion[] = { '0', '0', '9', '\0' };
+    // Last update: Use set for unverified_classes_.
+    static constexpr uint8_t kVdexVersion[] = { '0', '1', '0', '\0' };
 
     uint8_t magic_[4];
     uint8_t version_[4];
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 0c460db..312d8df 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3861,10 +3861,20 @@
   // TODO: Maybe we should not record dependency if the invoke type does not match the lookup type.
   VerifierDeps::MaybeRecordMethodResolution(*dex_file_, dex_method_idx, res_method);
 
+  bool must_fail = false;
+  // This is traditional and helps with screwy bytecode. It will tell you that, yes, a method
+  // exists, but that it's called incorrectly. This significantly helps debugging, as locally it's
+  // hard to see the differences.
+  // If we don't have res_method here we must fail. Just use this bool to make sure of that with a
+  // DCHECK.
   if (res_method == nullptr) {
+    must_fail = true;
     // Try to find the method also with the other type for better error reporting below
     // but do not store such bogus lookup result in the DexCache or VerifierDeps.
     if (klass->IsInterface()) {
+      // NB This is normally not really allowed but we want to get any static or private object
+      // methods for error message purposes. This will never be returned.
+      // TODO We might want to change the verifier to not require this.
       res_method = klass->FindClassMethod(dex_cache_.Get(), dex_method_idx, pointer_size);
     } else {
       // If there was an interface method with the same signature,
@@ -3922,6 +3932,20 @@
     }
   }
 
+  // Check specifically for non-public object methods being provided for interface dispatch. This
+  // can occur if we failed to find a method with FindInterfaceMethod but later find one with
+  // FindClassMethod for error message use.
+  if (method_type == METHOD_INTERFACE &&
+      res_method->GetDeclaringClass()->IsObjectClass() &&
+      !res_method->IsPublic()) {
+    Fail(VERIFY_ERROR_NO_METHOD) << "invoke-interface " << klass->PrettyDescriptor() << "."
+                                 << dex_file_->GetMethodName(method_id) << " "
+                                 << dex_file_->GetMethodSignature(method_id) << " resolved to "
+                                 << "non-public object method " << res_method->PrettyMethod() << " "
+                                 << "but non-public Object methods are excluded from interface "
+                                 << "method resolution.";
+    return nullptr;
+  }
   // Check if access is allowed.
   if (!referrer.CanAccessMember(res_method->GetDeclaringClass(), res_method->GetAccessFlags())) {
     Fail(VERIFY_ERROR_ACCESS_METHOD) << "illegal method access (call "
@@ -3950,6 +3974,13 @@
                                        "type of " << res_method->PrettyMethod();
     return nullptr;
   }
+  // Make sure we weren't expecting to fail.
+  DCHECK(!must_fail) << "invoke type (" << method_type << ")"
+                     << klass->PrettyDescriptor() << "."
+                     << dex_file_->GetMethodName(method_id) << " "
+                     << dex_file_->GetMethodSignature(method_id) << " unexpectedly resolved to "
+                     << res_method->PrettyMethod() << " without error. Initially this method was "
+                     << "not found so we were expecting to fail for some reason.";
   return res_method;
 }
 
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 112eec8..0481f24 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -33,7 +33,8 @@
 namespace art {
 namespace verifier {
 
-VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files) {
+VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files, bool output_only)
+    : output_only_(output_only) {
   for (const DexFile* dex_file : dex_files) {
     DCHECK(GetDexFileDeps(*dex_file) == nullptr);
     std::unique_ptr<DexFileDeps> deps(new DexFileDeps());
@@ -41,6 +42,9 @@
   }
 }
 
+VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files)
+    : VerifierDeps(dex_files, /*output_only*/ true) {}
+
 void VerifierDeps::MergeWith(const VerifierDeps& other,
                              const std::vector<const DexFile*>& dex_files) {
   DCHECK(dex_deps_.size() == other.dex_deps_.size());
@@ -55,9 +59,7 @@
     MergeSets(my_deps->classes_, other_deps.classes_);
     MergeSets(my_deps->fields_, other_deps.fields_);
     MergeSets(my_deps->methods_, other_deps.methods_);
-    for (dex::TypeIndex entry : other_deps.unverified_classes_) {
-      my_deps->unverified_classes_.push_back(entry);
-    }
+    MergeSets(my_deps->unverified_classes_, other_deps.unverified_classes_);
   }
 }
 
@@ -503,7 +505,7 @@
   VerifierDeps* thread_deps = GetThreadLocalVerifierDeps();
   if (thread_deps != nullptr) {
     DexFileDeps* dex_deps = thread_deps->GetDexFileDeps(dex_file);
-    dex_deps->unverified_classes_.push_back(type_idx);
+    dex_deps->unverified_classes_.insert(type_idx);
   }
 }
 
@@ -582,6 +584,16 @@
   return dex::StringIndex(in);
 }
 
+// TODO: Clean this up, if we use a template arg here it confuses the compiler.
+static inline void EncodeTuple(std::vector<uint8_t>* out, const dex::TypeIndex& t) {
+  EncodeUnsignedLeb128(out, Encode(t));
+}
+
+// TODO: Clean this up, if we use a template arg here it confuses the compiler.
+static inline void DecodeTuple(const uint8_t** in, const uint8_t* end, dex::TypeIndex* t) {
+  *t = Decode<dex::TypeIndex>(DecodeUint32WithOverflowCheck(in, end));
+}
+
 template<typename T1, typename T2>
 static inline void EncodeTuple(std::vector<uint8_t>* out, const std::tuple<T1, T2>& t) {
   EncodeUnsignedLeb128(out, Encode(std::get<0>(t)));
@@ -688,13 +700,13 @@
     EncodeSet(buffer, deps.classes_);
     EncodeSet(buffer, deps.fields_);
     EncodeSet(buffer, deps.methods_);
-    EncodeUint16Vector(buffer, deps.unverified_classes_);
+    EncodeSet(buffer, deps.unverified_classes_);
   }
 }
 
 VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files,
                            ArrayRef<const uint8_t> data)
-    : VerifierDeps(dex_files) {
+    : VerifierDeps(dex_files, /*output_only*/ false) {
   if (data.empty()) {
     // Return eagerly, as the first thing we expect from VerifierDeps data is
     // the number of created strings, even if there is no dependency.
@@ -711,7 +723,7 @@
     DecodeSet(&data_start, data_end, &deps->classes_);
     DecodeSet(&data_start, data_end, &deps->fields_);
     DecodeSet(&data_start, data_end, &deps->methods_);
-    DecodeUint16Vector(&data_start, data_end, &deps->unverified_classes_);
+    DecodeSet(&data_start, data_end, &deps->unverified_classes_);
   }
   CHECK_LE(data_start, data_end);
 }
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index b883a9e..4069a11 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -117,10 +117,14 @@
   bool ValidateDependencies(Handle<mirror::ClassLoader> class_loader, Thread* self) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  const std::vector<dex::TypeIndex>& GetUnverifiedClasses(const DexFile& dex_file) const {
+  const std::set<dex::TypeIndex>& GetUnverifiedClasses(const DexFile& dex_file) const {
     return GetDexFileDeps(dex_file)->unverified_classes_;
   }
 
+  bool OutputOnly() const {
+    return output_only_;
+  }
+
  private:
   static constexpr uint16_t kUnresolvedMarker = static_cast<uint16_t>(-1);
 
@@ -193,11 +197,13 @@
     std::set<MethodResolution> methods_;
 
     // List of classes that were not fully verified in that dex file.
-    std::vector<dex::TypeIndex> unverified_classes_;
+    std::set<dex::TypeIndex> unverified_classes_;
 
     bool Equals(const DexFileDeps& rhs) const;
   };
 
+  VerifierDeps(const std::vector<const DexFile*>& dex_files, bool output_only);
+
   // Finds the DexFileDep instance associated with `dex_file`, or nullptr if
   // `dex_file` is not reported as being compiled.
   DexFileDeps* GetDexFileDeps(const DexFile& dex_file);
@@ -321,6 +327,9 @@
   // Map from DexFiles into dependencies collected from verification of their methods.
   std::map<const DexFile*, std::unique_ptr<DexFileDeps>> dex_deps_;
 
+  // Output only signifies if we are using the verifier deps to verify or just to generate them.
+  const bool output_only_;
+
   friend class VerifierDepsTest;
   ART_FRIEND_TEST(VerifierDepsTest, StringToId);
   ART_FRIEND_TEST(VerifierDepsTest, EncodeDecode);
diff --git a/test/075-verification-error/expected.txt b/test/075-verification-error/expected.txt
index 6e4f584..7ccc32c 100644
--- a/test/075-verification-error/expected.txt
+++ b/test/075-verification-error/expected.txt
@@ -10,3 +10,4 @@
 Got expected IllegalAccessError (meth-class)
 Got expected IllegalAccessError (field-class)
 Got expected IllegalAccessError (meth-meth)
+Got expected IncompatibleClassChangeError (interface)
diff --git a/test/075-verification-error/src/BadIfaceImpl.java b/test/075-verification-error/src/BadIfaceImpl.java
new file mode 100644
index 0000000..fa2a970
--- /dev/null
+++ b/test/075-verification-error/src/BadIfaceImpl.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class BadIfaceImpl implements BadInterface { }
diff --git a/test/075-verification-error/src/BadInterface.java b/test/075-verification-error/src/BadInterface.java
new file mode 100644
index 0000000..439aba4
--- /dev/null
+++ b/test/075-verification-error/src/BadInterface.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface BadInterface {
+  public default Object internalClone() {
+    throw new Error("Should not be called");
+  }
+}
diff --git a/test/075-verification-error/src/Main.java b/test/075-verification-error/src/Main.java
index 3f2881e..13aeaee 100644
--- a/test/075-verification-error/src/Main.java
+++ b/test/075-verification-error/src/Main.java
@@ -28,6 +28,20 @@
         testClassNewInstance();
         testMissingStuff();
         testBadAccess();
+        testBadInterfaceMethod();
+     }
+    /**
+     * Try to create and invoke a non-existant interface method.
+     */
+    static void testBadInterfaceMethod() {
+        BadInterface badiface = new BadIfaceImpl();
+        try {
+            badiface.internalClone();
+        } catch (IncompatibleClassChangeError icce) {
+            // TODO b/64274113 This should really be an NSME
+            System.out.println("Got expected IncompatibleClassChangeError (interface)");
+            if (VERBOSE) System.out.println("--- " + icce);
+        }
     }
 
     /**
diff --git a/test/075-verification-error/src2/BadInterface.java b/test/075-verification-error/src2/BadInterface.java
new file mode 100644
index 0000000..5d939cb
--- /dev/null
+++ b/test/075-verification-error/src2/BadInterface.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public interface BadInterface { }
+
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index bca3df6..13a96c7 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -34,6 +34,12 @@
             disableStackFrameAsserts();
         }
 
+        ensureJitCompiled(Main.class, "recursiveSync");
+        ensureJitCompiled(Main.class, "nestedMayThrow");
+        ensureJitCompiled(Main.class, "constantLock");
+        ensureJitCompiled(Main.class, "notExcessiveNesting");
+        ensureJitCompiled(Main.class, "notNested");
+
         Main m = new Main();
 
         m.recursiveSync(0);
@@ -273,4 +279,5 @@
     public static native boolean runtimeIsSoftFail();
     public static native boolean isInterpreted();
     public static native void disableStackFrameAsserts();
+    private static native void ensureJitCompiled(Class<?> itf, String method_name);
 }
diff --git a/test/1338-gc-no-los/expected.txt b/test/1338-gc-no-los/expected.txt
new file mode 100644
index 0000000..36bec43
--- /dev/null
+++ b/test/1338-gc-no-los/expected.txt
@@ -0,0 +1 @@
+131072 200 true
diff --git a/test/1338-gc-no-los/info.txt b/test/1338-gc-no-los/info.txt
new file mode 100644
index 0000000..2e27357
--- /dev/null
+++ b/test/1338-gc-no-los/info.txt
@@ -0,0 +1 @@
+Test that the GC works with no large object space. Regression test for b/64393515.
\ No newline at end of file
diff --git a/test/1338-gc-no-los/run b/test/1338-gc-no-los/run
new file mode 100755
index 0000000..f3c43fb
--- /dev/null
+++ b/test/1338-gc-no-los/run
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+./default-run "$@" --runtime-option -XX:LargeObjectSpace=disabled
diff --git a/test/1338-gc-no-los/src-art/Main.java b/test/1338-gc-no-los/src-art/Main.java
new file mode 100644
index 0000000..662fa7e
--- /dev/null
+++ b/test/1338-gc-no-los/src-art/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import dalvik.system.VMRuntime;
+import java.lang.ref.WeakReference;
+import java.lang.reflect.Method;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            // Allocate a large object.
+            byte[] arr = new byte[128 * 1024];
+            // Allocate a non movable object.
+            byte[] arr2 = (byte[])VMRuntime.getRuntime().newNonMovableArray(Byte.TYPE, 200);
+            // Put the array in a weak reference so that IsMarked is called by the GC.
+            WeakReference weakRef = new WeakReference(arr2);
+            // Do a GC.
+            Runtime.getRuntime().gc();
+            arr[0] = 1;
+            arr2[0] = 1;
+            System.out.println(arr.length + " " + arr2.length + " " + (weakRef.get() != null));
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+    }
+}
diff --git a/test/162-method-resolution/expected.txt b/test/162-method-resolution/expected.txt
index 1bf39c9..9b48a4c 100644
--- a/test/162-method-resolution/expected.txt
+++ b/test/162-method-resolution/expected.txt
@@ -41,3 +41,6 @@
 Calling Test9User2.test():
 Caught java.lang.reflect.InvocationTargetException
   caused by java.lang.IncompatibleClassChangeError
+Calling Test10User.test():
+Caught java.lang.reflect.InvocationTargetException
+  caused by java.lang.IncompatibleClassChangeError
diff --git a/test/162-method-resolution/jasmin/Test10Base.j b/test/162-method-resolution/jasmin/Test10Base.j
new file mode 100644
index 0000000..628f38d
--- /dev/null
+++ b/test/162-method-resolution/jasmin/Test10Base.j
@@ -0,0 +1,25 @@
+; Copyright (C) 2017 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+.class                   public Test10Base
+.super                   java/lang/Object
+.implements              Test10Interface
+
+.method                  public <init>()V
+   .limit stack          1
+   .limit locals         1
+   aload_0
+   invokespecial         java/lang/Object/<init>()V
+   return
+.end method
diff --git a/test/162-method-resolution/jasmin/Test10User.j b/test/162-method-resolution/jasmin/Test10User.j
new file mode 100644
index 0000000..6beadab
--- /dev/null
+++ b/test/162-method-resolution/jasmin/Test10User.j
@@ -0,0 +1,36 @@
+; Copyright (C) 2017 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+.class public Test10User
+.super java/lang/Object
+
+.method public static test()V
+    .limit stack 3
+    .limit locals 3
+    new Test10Base
+    dup
+    invokespecial Test10Base.<init>()V
+    invokestatic  Test10User.doInvoke(LTest10Interface;)V
+    return
+.end method
+
+.method public static doInvoke(LTest10Interface;)V
+    .limit stack 3
+    .limit locals 3
+    aload_0
+    invokeinterface Test10Interface.clone()Ljava.lang.Object; 1
+    pop
+    return
+.end method
+
diff --git a/test/162-method-resolution/multidex.jpp b/test/162-method-resolution/multidex.jpp
index 22e3aee..5722f7f 100644
--- a/test/162-method-resolution/multidex.jpp
+++ b/test/162-method-resolution/multidex.jpp
@@ -112,6 +112,16 @@
   @@com.android.jack.annotations.ForceInMainDex
   class Test9User2
 
+Test10Base:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Test10Base
+Test10Interface:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Test10Interface
+Test10User:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Test10User
+
 Main:
   @@com.android.jack.annotations.ForceInMainDex
   class Main
diff --git a/test/162-method-resolution/src/Main.java b/test/162-method-resolution/src/Main.java
index fa95aa7..864c878 100644
--- a/test/162-method-resolution/src/Main.java
+++ b/test/162-method-resolution/src/Main.java
@@ -36,6 +36,7 @@
             test7();
             test8();
             test9();
+            test10();
 
             // TODO: How to test that interface method resolution returns the unique
             // maximally-specific non-abstract superinterface method if there is one?
@@ -376,6 +377,31 @@
         invokeUserTest("Test9User2");
     }
 
+    /*
+     * Test10
+     * -----
+     * Tested function:
+     *     public class Test10Base implements Test10Interface { }
+     *     public interface Test10Interface { }
+     * Tested invokes:
+     *     invoke-interface Test10Interface.clone()Ljava/lang/Object; from Test10Caller in first dex
+     *         TODO b/64274113 This should throw a NSME (JLS 13.4.12) but actually throws an ICCE.
+     *         expected: Throws NoSuchMethodError (JLS 13.4.12)
+     *         actual: Throws IncompatibleClassChangeError
+     *
+     * This test is simulating compiling Test10Interface with "public Object clone()" method, along
+     * with every other class. Then we delete "clone" from Test10Interface only, which under JLS
+     * 13.4.12 is expected to be binary incompatible and throw a NoSuchMethodError.
+     *
+     * Files:
+     *   jasmin/Test10Base.j          - implements Test10Interface
+     *   jasmin/Test10Interface.java  - defines empty interface
+     *   jasmin/Test10User.j          - invokeinterface Test10Interface.clone()Ljava/lang/Object;
+     */
+    private static void test10() throws Exception {
+        invokeUserTest("Test10User");
+    }
+
     private static void invokeUserTest(String userName) throws Exception {
         System.out.println("Calling " + userName + ".test():");
         try {
diff --git a/test/162-method-resolution/src/Test10Interface.java b/test/162-method-resolution/src/Test10Interface.java
new file mode 100644
index 0000000..3c75ea5
--- /dev/null
+++ b/test/162-method-resolution/src/Test10Interface.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Test10Interface { }
+
diff --git a/test/1917-get-stack-frame/expected.txt b/test/1917-get-stack-frame/expected.txt
index 26217e6..4c9efcf 100644
--- a/test/1917-get-stack-frame/expected.txt
+++ b/test/1917-get-stack-frame/expected.txt
@@ -1,33 +1,33 @@
 Recurring 5 times
 'private static native art.StackTrace$StackFrameData[] art.StackTrace.nativeGetStackTrace(java.lang.Thread)' line: -1
 'public static art.StackTrace$StackFrameData[] art.StackTrace.GetStackTrace(java.lang.Thread)' line: 60
-'public void art.Test1917$StackTraceGenerator.run()' line: 81
-'public void art.Test1917$RecurCount.doRecur(int)' line: 103
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.run()' line: 96
-'public static void art.Test1917.run() throws java.lang.Exception' line: 132
+'public void art.Test1917$StackTraceGenerator.run()' line: 82
+'public void art.Test1917$RecurCount.doRecur(int)' line: 104
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.run()' line: 97
+'public static void art.Test1917.run() throws java.lang.Exception' line: 133
 Recurring 5 times on another thread
 'private static native art.StackTrace$StackFrameData[] art.StackTrace.nativeGetStackTrace(java.lang.Thread)' line: -1
 'public static art.StackTrace$StackFrameData[] art.StackTrace.GetStackTrace(java.lang.Thread)' line: 60
-'public void art.Test1917$StackTraceGenerator.run()' line: 81
-'public void art.Test1917$RecurCount.doRecur(int)' line: 103
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.run()' line: 96
+'public void art.Test1917$StackTraceGenerator.run()' line: 82
+'public void art.Test1917$RecurCount.doRecur(int)' line: 104
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.run()' line: 97
 Recurring 5 times on another thread. Stack trace from main thread!
 'public void java.util.concurrent.Semaphore.acquire() throws java.lang.InterruptedException' line: <NOT-DETERMINISTIC>
 'public void art.Test1917$ThreadPauser.run()' line: 46
-'public void art.Test1917$RecurCount.doRecur(int)' line: 103
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.doRecur(int)' line: 101
-'public void art.Test1917$RecurCount.run()' line: 96
+'public void art.Test1917$RecurCount.doRecur(int)' line: 104
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.doRecur(int)' line: 102
+'public void art.Test1917$RecurCount.run()' line: 97
diff --git a/test/1917-get-stack-frame/src/art/Test1917.java b/test/1917-get-stack-frame/src/art/Test1917.java
index 1057235..def7530 100644
--- a/test/1917-get-stack-frame/src/art/Test1917.java
+++ b/test/1917-get-stack-frame/src/art/Test1917.java
@@ -51,6 +51,7 @@
 
     public void waitForOtherThreadToPause() throws Exception {
       sem_wakeup_main.acquire();
+      while (!sem_wait.hasQueuedThreads()) {}
     }
 
     public void wakeupOtherThread() throws Exception {
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 5103540..f466eea 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -876,6 +876,91 @@
     return true;
   }
 
+  /// CHECK-START: void Main.modArrayIndex1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex1(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex1(int[] array) {
+    for(int i = 0; i < 100; i++) {
+      // Cannot statically eliminate, for example, when array.length == 5.
+      // Currently dynamic BCE isn't applied for this case.
+      array[i % 10] = i;
+      // Can be eliminated by BCE.
+      array[i % array.length] = i;
+    }
+  }
+
+  /// CHECK-START: void Main.modArrayIndex2(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex2(int[], int) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex2(int array[], int index) {
+    for(int i = 0; i < 100; i++) {
+      // Both bounds checks cannot be statically eliminated, because index can be < 0.
+      // Currently dynamic BCE isn't applied for this case.
+      array[(index+i) % 10] = i;
+      array[(index+i) % array.length] = i;
+    }
+  }
+
+  static final int[] staticArray = new int[10];
+
+  /// CHECK-START: void Main.modArrayIndex3() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex3() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex3() {
+    for(int i = 0; i < 100; i++) {
+      // Currently dynamic BCE isn't applied for this case.
+      staticArray[i % 10] = i;
+      // Can be eliminated by BCE.
+      staticArray[i % staticArray.length] = i;
+    }
+  }
+
+  /// CHECK-START: void Main.modArrayIndex4() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex4() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex4() {
+    int[] array = new int[20];
+    for(int i = 0; i < 100; i++) {
+      // The local array length is statically know. Both can be eliminated by BCE.
+      array[i % 10] = i;
+      array[i % array.length] = i;
+    }
+  }
 
   /// CHECK-START: void Main.bubbleSort(int[]) GVN (before)
   /// CHECK: BoundsCheck
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 6fd9cdd..9e6fd3d 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -424,16 +424,29 @@
     return - (left * right);
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecAdd
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     VecMul
   /// CHECK-NOT:     VecAdd
   public static void SimdMulAdd(int[] array1, int[] array2) {
@@ -442,16 +455,47 @@
     }
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecAdd
+  public static void SimdMulAddLong(long[] array1, long[] array2) {
+    for (int j = 0; j < 100; j++) {
+      array2[j] += 12345 * array1[j];
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecSub
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     VecMul
   /// CHECK-NOT:     VecSub
   public static void SimdMulSub(int[] array1, int[] array2) {
@@ -460,12 +504,38 @@
     }
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecSub
+  public static void SimdMulSubLong(long[] array1, long[] array2) {
+    for (int j = 0; j < 100; j++) {
+      array2[j] -= 12345 * array1[j];
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT: VecMultiplyAccumulate
+
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT: VecMultiplyAccumulate
   public static void SimdMulMultipleUses(int[] array1, int[] array2) {
     for (int j = 0; j < 100; j++) {
@@ -475,6 +545,21 @@
     }
   }
 
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT: VecMultiplyAccumulate
+  public static void SimdMulMultipleUsesLong(long[] array1, long[] array2) {
+    for (int j = 0; j < 100; j++) {
+       long temp = 12345 * array1[j];
+       array2[j] -= temp;
+       array1[j] = temp;
+    }
+  }
+
   public static final int ARRAY_SIZE = 1000;
 
   public static void initArray(int[] array) {
@@ -483,6 +568,12 @@
     }
   }
 
+  public static void initArrayLong(long[] array) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      array[i] = i;
+    }
+  }
+
   public static int calcArraySum(int[] array) {
     int sum = 0;
     for (int i = 0; i < ARRAY_SIZE; i++) {
@@ -491,19 +582,39 @@
     return sum;
   }
 
+  public static long calcArraySumLong(long[] array) {
+    long sum = 0;
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      sum += array[i];
+    }
+    return sum;
+  }
+
   public static void testSimdMultiplyAccumulate() {
     int[] array1 = new int[ARRAY_SIZE];
     int[] array2 = new int[ARRAY_SIZE];
+    long[] array3 = new long[ARRAY_SIZE];
+    long[] array4 = new long[ARRAY_SIZE];
 
     initArray(array1);
     initArray(array2);
     SimdMulSub(array1, array2);
     assertIntEquals(-60608250, calcArraySum(array2));
 
+    initArrayLong(array3);
+    initArrayLong(array4);
+    SimdMulSubLong(array3, array4);
+    assertLongEquals(-60608250, calcArraySumLong(array4));
+
     initArray(array1);
     initArray(array2);
     SimdMulAdd(array1, array2);
     assertIntEquals(61607250, calcArraySum(array2));
+
+    initArrayLong(array3);
+    initArrayLong(array4);
+    SimdMulAddLong(array3, array4);
+    assertLongEquals(61607250, calcArraySumLong(array4));
   }
 
   public static void main(String[] args) {
diff --git a/test/592-checker-regression-bool-input/smali/TestCase.smali b/test/592-checker-regression-bool-input/smali/TestCase.smali
index 56c499d..ad4e902 100644
--- a/test/592-checker-regression-bool-input/smali/TestCase.smali
+++ b/test/592-checker-regression-bool-input/smali/TestCase.smali
@@ -16,8 +16,15 @@
 
 .super Ljava/lang/Object;
 
+## CHECK-START: boolean TestCase.testCase() select_generator (after)
+## CHECK-DAG:     <<Select:i\d+>>          Select
+## CHECK-DAG:                              Return [<<Select>>]
+
 ## CHECK-START: boolean TestCase.testCase() load_store_elimination (after)
-## CHECK-DAG:     If [{{b\d+}}]
+## CHECK-DAG:     <<Or:i\d+>>              Or
+## CHECK-DAG:     <<TypeConversion:b\d+>>  TypeConversion
+## CHECK-DAG:                              StaticFieldSet
+## CHECK-DAG:                              Return [<<TypeConversion>>]
 
 .method public static testCase()Z
     .registers 6
@@ -31,7 +38,8 @@
     # LSE will replace this sget with the type conversion above...
     sget-boolean v2, LMain;->field2:Z
 
-    # ... and generate an If with a byte-typed condition.
+    # ... and select generation will replace this part with a select
+    # that simplifies into simply returning the stored boolean.
     if-eqz v2, :else
     const v0, 0x1
     return v0
diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run
index 055035b..851be09 100644
--- a/test/595-profile-saving/run
+++ b/test/595-profile-saving/run
@@ -19,9 +19,11 @@
 # and to make sure the test is not compiled  when loaded (by PathClassLoader)
 # -Xjitsaveprofilinginfo to enable profile saving
 # -Xusejit:false to disable jit and only test profiles.
+# -Xjitinitialsize:32M to prevent profiling info creation failure.
 exec ${RUN} \
   -Xcompiler-option --compiler-filter=quicken \
   --runtime-option '-Xcompiler-option --compiler-filter=quicken' \
+  --runtime-option -Xjitinitialsize:32M \
   --runtime-option -Xjitsaveprofilinginfo \
   --runtime-option -Xusejit:false \
   --runtime-option -Xps-profile-boot-class-path \
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index aca997e..0e85612 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -426,6 +426,44 @@
     }
   }
 
+  // Environment of an instruction, removed during SimplifyInduction, should be adjusted.
+  //
+  /// CHECK-START: void Main.inductionMax(int[]) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.inductionMax(int[]) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  private static void inductionMax(int[] a) {
+   int s = 0;
+    for (int i = 0; i < 10; i++) {
+      s = Math.max(s, 5);
+    }
+  }
+
+  /// CHECK-START: int Main.feedsIntoDeopt(int[]) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.feedsIntoDeopt(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  /// CHECK-NOT: Phi
+  static int feedsIntoDeopt(int[] a) {
+    // Reduction should be removed.
+    int r = 0;
+    for (int i = 0; i < 100; i++) {
+      r += 10;
+    }
+    // Even though uses feed into deopts of BCE.
+    for (int i = 1; i < 100; i++) {
+      a[i] = a[i - 1];
+    }
+    return r;
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -539,6 +577,15 @@
       expectEquals((byte)(i + 1), b1[i]);
     }
 
+    inductionMax(yy);
+
+    int[] f = new int[100];
+    f[0] = 11;
+    expectEquals(1000, feedsIntoDeopt(f));
+    for (int i = 0; i < 100; i++) {
+      expectEquals(11, f[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/660-clinit/expected.txt b/test/660-clinit/expected.txt
index e103a2c..9eb4941 100644
--- a/test/660-clinit/expected.txt
+++ b/test/660-clinit/expected.txt
@@ -1,4 +1,8 @@
 JNI_OnLoad called
+A.a: 5
+A.a: 10
+B.b: 10
+C.c: 10
 X: 4950
 Y: 5730
 str: Hello World!
diff --git a/test/660-clinit/profile b/test/660-clinit/profile
new file mode 100644
index 0000000..0239f22
--- /dev/null
+++ b/test/660-clinit/profile
@@ -0,0 +1,10 @@
+LMain;
+LClInit;
+LDay;
+LA;
+LB;
+LC;
+LG;
+LGs;
+LObjectRef;
+
diff --git a/test/660-clinit/run b/test/660-clinit/run
new file mode 100644
index 0000000..d24ef42
--- /dev/null
+++ b/test/660-clinit/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} $@ --profile
diff --git a/test/660-clinit/src/Main.java b/test/660-clinit/src/Main.java
index f547692..f9b068e 100644
--- a/test/660-clinit/src/Main.java
+++ b/test/660-clinit/src/Main.java
@@ -26,7 +26,26 @@
     }
 
     expectNotPreInit(Day.class);
-    expectNotPreInit(ClInit.class);
+    expectNotPreInit(ClInit.class); // should pass
+    expectNotPreInit(A.class); // should pass
+    expectNotPreInit(B.class); // should fail
+    expectNotPreInit(C.class); // should fail
+    expectNotPreInit(G.class); // should fail
+    expectNotPreInit(Gs.class); // should fail
+    expectNotPreInit(Gss.class); // should fail
+
+    expectNotPreInit(Add.class);
+    expectNotPreInit(Mul.class);
+    expectNotPreInit(ObjectRef.class);
+
+    A x = new A();
+    System.out.println("A.a: " + A.a);
+
+    B y = new B();
+    C z = new C();
+    System.out.println("A.a: " + A.a);
+    System.out.println("B.b: " + B.b);
+    System.out.println("C.c: " + C.c);
 
     ClInit c = new ClInit();
     int aa = c.a;
@@ -113,3 +132,59 @@
   }
 }
 
+class A {
+  public static int a = 2;
+  static {
+    a = 5;  // self-updating, pass
+  }
+}
+
+class B {
+  public static int b;
+  static {
+    A.a = 10;  // write other's static field, fail
+    b = A.a;   // read other's static field, fail
+  }
+}
+
+class C {
+  public static int c;
+  static {
+    c = A.a; // read other's static field, fail
+  }
+}
+
+class G {
+  static G g;
+  static int i;
+  static {
+    g = new Gss(); // fail because recursive dependency
+    i = A.a;  // read other's static field, fail
+  }
+}
+
+// Gs will be successfully initialized as G's status is initializing at that point, which will
+// later aborted but Gs' transaction is already committed.
+// Instantiation of Gs will fail because we try to invoke G's <init>
+// but G's status will be StatusVerified. INVOKE_DIRECT will not initialize class.
+class Gs extends G {}  // fail because super class can't be initialized
+class Gss extends Gs {}
+
+// pruned because holding reference to non-image class
+class ObjectRef {
+  static Class<?> klazz[] = new Class<?>[]{Add.class, Mul.class};
+}
+
+// non-image
+class Add {
+  static int exec(int a, int b) {
+    return a + b;
+  }
+}
+
+// non-image
+class Mul {
+  static int exec(int a, int b) {
+    return a * b;
+  }
+}
diff --git a/test/661-checker-simd-reduc/expected.txt b/test/661-checker-simd-reduc/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/661-checker-simd-reduc/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/661-checker-simd-reduc/info.txt b/test/661-checker-simd-reduc/info.txt
new file mode 100644
index 0000000..4d071fb
--- /dev/null
+++ b/test/661-checker-simd-reduc/info.txt
@@ -0,0 +1 @@
+Functional tests on vectorization of the most basic reductions.
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
new file mode 100644
index 0000000..741b5fa
--- /dev/null
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for simple integral reductions: same type for accumulator and data.
+ */
+public class Main {
+
+  static final int N = 500;
+
+  //
+  // Basic reductions in loops.
+  //
+
+  // TODO: vectorize these (second step of b/64091002 plan)
+
+  private static byte reductionByte(byte[] x) {
+    byte sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static short reductionShort(short[] x) {
+    short sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static char reductionChar(char[] x) {
+    char sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionInt(int[] x) {
+    int sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static long reductionLong(long[] x) {
+    long sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static byte reductionMinusByte(byte[] x) {
+    byte sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static short reductionMinusShort(short[] x) {
+    short sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static char reductionMinusChar(char[] x) {
+    char sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionMinusInt(int[] x) {
+    int sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static long reductionMinusLong(long[] x) {
+    long sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static byte reductionMinByte(byte[] x) {
+    byte min = Byte.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = (byte) Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static short reductionMinShort(short[] x) {
+    short min = Short.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = (short) Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static char reductionMinChar(char[] x) {
+    char min = Character.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = (char) Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static int reductionMinInt(int[] x) {
+    int min = Integer.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static long reductionMinLong(long[] x) {
+    long min = Long.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static byte reductionMaxByte(byte[] x) {
+    byte max = Byte.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = (byte) Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static short reductionMaxShort(short[] x) {
+    short max = Short.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = (short) Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static char reductionMaxChar(char[] x) {
+    char max = Character.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = (char) Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static int reductionMaxInt(int[] x) {
+    int max = Integer.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static long reductionMaxLong(long[] x) {
+    long max = Long.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  //
+  // A few special cases.
+  //
+
+  // TODO: consider unrolling
+
+  private static int reductionInt10(int[] x) {
+    int sum = 0;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionMinusInt10(int[] x) {
+    int sum = 0;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionMinInt10(int[] x) {
+    int min = Integer.MAX_VALUE;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      min = Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static int reductionMaxInt10(int[] x) {
+    int max = Integer.MIN_VALUE;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      max = Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  //
+  // Main driver.
+  //
+
+  public static void main(String[] args) {
+    byte[] xb = new byte[N];
+    short[] xs = new short[N];
+    char[] xc = new char[N];
+    int[] xi = new int[N];
+    long[] xl = new long[N];
+    for (int i = 0, k = -17; i < N; i++, k += 3) {
+      xb[i] = (byte) k;
+      xs[i] = (short) k;
+      xc[i] = (char) k;
+      xi[i] = k;
+      xl[i] = k;
+    }
+
+    // Test various reductions in loops.
+    expectEquals(-74, reductionByte(xb));
+    expectEquals(-27466, reductionShort(xs));
+    expectEquals(38070, reductionChar(xc));
+    expectEquals(365750, reductionInt(xi));
+    expectEquals(365750L, reductionLong(xl));
+    expectEquals(74, reductionMinusByte(xb));
+    expectEquals(27466, reductionMinusShort(xs));
+    expectEquals(27466, reductionMinusChar(xc));
+    expectEquals(-365750, reductionMinusInt(xi));
+    expectEquals(-365750L, reductionMinusLong(xl));
+    expectEquals(-128, reductionMinByte(xb));
+    expectEquals(-17, reductionMinShort(xs));
+    expectEquals(1, reductionMinChar(xc));
+    expectEquals(-17, reductionMinInt(xi));
+    expectEquals(-17L, reductionMinLong(xl));
+    expectEquals(127, reductionMaxByte(xb));
+    expectEquals(1480, reductionMaxShort(xs));
+    expectEquals(65534, reductionMaxChar(xc));
+    expectEquals(1480, reductionMaxInt(xi));
+    expectEquals(1480L, reductionMaxLong(xl));
+
+    // Test special cases.
+    expectEquals(13, reductionInt10(xi));
+    expectEquals(-13, reductionMinusInt10(xi));
+    expectEquals(13, reductionMinInt10(xi));
+    expectEquals(13, reductionMaxInt10(xi));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/661-classloader-allocator/expected.txt b/test/661-classloader-allocator/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/661-classloader-allocator/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/661-classloader-allocator/info.txt b/test/661-classloader-allocator/info.txt
new file mode 100644
index 0000000..872b5e0
--- /dev/null
+++ b/test/661-classloader-allocator/info.txt
@@ -0,0 +1,3 @@
+Regression test for copied methods which used to not
+be (re-)allocated with the right class loader allocator,
+which crashed the JIT profiler.
diff --git a/test/661-classloader-allocator/src-ex/OtherClass.java b/test/661-classloader-allocator/src-ex/OtherClass.java
new file mode 100644
index 0000000..e59cb95
--- /dev/null
+++ b/test/661-classloader-allocator/src-ex/OtherClass.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+interface Itf {
+  public default int defaultMethod() {
+    return 42;
+  }
+}
+
+public class OtherClass implements Itf {
+  public void foo() {
+  }
+}
diff --git a/test/661-classloader-allocator/src/Main.java b/test/661-classloader-allocator/src/Main.java
new file mode 100644
index 0000000..40f8f7a
--- /dev/null
+++ b/test/661-classloader-allocator/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Constructor;
+
+public class Main {
+  static final String DEX_FILE =
+      System.getenv("DEX_LOCATION") + "/661-classloader-allocator-ex.jar";
+  static final String LIBRARY_SEARCH_PATH = System.getProperty("java.library.path");
+
+  private static void doUnloading() {
+    // Stop the JIT to ensure its threads and work queue are not keeping classes
+    // artifically alive.
+    stopJit();
+    // Do multiple GCs to prevent rare flakiness if some other thread is keeping the
+    // classloader live.
+    for (int i = 0; i < 5; ++i) {
+      Runtime.getRuntime().gc();
+    }
+    startJit();
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    loadClass();
+    doUnloading();
+    // fetchProfiles iterate over the ProfilingInfo, we used to crash in the presence
+    // of unloaded copied methods.
+    fetchProfiles();
+  }
+
+  public static void loadClass() throws Exception {
+    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+    if (pathClassLoader == null) {
+      throw new AssertionError("Couldn't find path class loader class");
+    }
+    Constructor<?> constructor =
+      pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
+    ClassLoader loader = (ClassLoader) constructor.newInstance(
+      DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+    Class<?> otherClass = loader.loadClass("p1.OtherClass");
+    ensureJitCompiled(otherClass, "foo");
+  }
+
+  public static native void ensureJitCompiled(Class<?> cls, String methodName);
+  public static native void fetchProfiles();
+  public static native void stopJit();
+  public static native void startJit();
+}
diff --git a/test/662-regression-alias/expected.txt b/test/662-regression-alias/expected.txt
new file mode 100644
index 0000000..7250211
--- /dev/null
+++ b/test/662-regression-alias/expected.txt
@@ -0,0 +1 @@
+passed 127 4
diff --git a/test/662-regression-alias/info.txt b/test/662-regression-alias/info.txt
new file mode 100644
index 0000000..584d3ee
--- /dev/null
+++ b/test/662-regression-alias/info.txt
@@ -0,0 +1 @@
+Regression test on missed array element aliasing.
diff --git a/test/662-regression-alias/src/Main.java b/test/662-regression-alias/src/Main.java
new file mode 100644
index 0000000..aad61e2
--- /dev/null
+++ b/test/662-regression-alias/src/Main.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test on ARM-scheduling/array-aliasing bug (b/64018485).
+ */
+public class Main {
+
+  //
+  // Mimic original bug.
+  //
+
+  static void setFields(int[] fields) {
+    if (fields == null || fields.length < 6)
+      fields = new int[6];  // creates phi
+    fields[5] = 127;
+  }
+
+  static void processFieldValues(int field0, int field1, int field2,
+                                 int field3, int field4, int field5) {
+    if (field5 != 127) {
+      throw new Error("field = " + field5);
+    } else if (field0 != 0) {
+      processFieldValues(0, 0, 0, 0, 0, 0);  // disable inlining
+    }
+  }
+
+  static int doit(int pass) {
+    int[] fields = new int[6];
+    for (; ; pass++) {
+      setFields(fields);
+      processFieldValues(fields[0], fields[1], fields[2],
+                         fields[3], fields[4], fields[5]);
+      if (pass == 0)
+        break;
+    }
+    return fields[5];
+  }
+
+  //
+  // Similar situation.
+  //
+
+  private static int aliasing(boolean f) {
+    int[] array = new int[6];
+    int[] array2 = null;
+    int s = 0;
+    for (int i = 0; i < 1; i++) {
+      if (f) {
+        array2 = array;
+      }
+      array2[1] = 4;
+      s = array[1];
+    }
+    return s;
+  }
+
+  //
+  // Main driver.
+  //
+
+  static public void main(String[] args) {
+    int r = doit(0);
+    int s = aliasing(true);
+    System.out.println("passed " + r + " " + s);
+  }
+}
diff --git a/runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION b/test/663-checker-select-generator/expected.txt
similarity index 100%
copy from runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
copy to test/663-checker-select-generator/expected.txt
diff --git a/test/663-checker-select-generator/info.txt b/test/663-checker-select-generator/info.txt
new file mode 100644
index 0000000..792779f
--- /dev/null
+++ b/test/663-checker-select-generator/info.txt
@@ -0,0 +1,14 @@
+Test for select generation for conditional returns.
+
+Tests the rewriting from:
+
+             If [ Condition ]
+               /          \
+     false branch        true branch
+     return FalseValue   return TrueValue
+
+to:
+
+     true branch
+     false branch
+     return Select [FalseValue, TrueValue, Condition]
diff --git a/test/663-checker-select-generator/smali/TestCase.smali b/test/663-checker-select-generator/smali/TestCase.smali
new file mode 100644
index 0000000..844a9cf
--- /dev/null
+++ b/test/663-checker-select-generator/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: boolean TestCase.testCase(boolean) select_generator (before)
+## CHECK-DAG:     <<Param:z\d+>>           ParameterValue
+## CHECK-DAG:     <<Int0:i\d+>>            IntConstant 0
+## CHECK-DAG:     <<Int1:i\d+>>            IntConstant 1
+## CHECK-DAG:                              If [<<Param>>]
+## CHECK-DAG:                              Return [<<Int0>>]
+## CHECK-DAG:                              Return [<<Int1>>]
+
+## CHECK-START: boolean TestCase.testCase(boolean) select_generator (after)
+## CHECK-DAG:     <<Param:z\d+>>           ParameterValue
+## CHECK-DAG:     <<Int0:i\d+>>            IntConstant 0
+## CHECK-DAG:     <<Int1:i\d+>>            IntConstant 1
+## CHECK-DAG:     <<Select:i\d+>>          Select [<<Int0>>,<<Int1>>,<<Param>>]
+## CHECK-DAG:                              Return [<<Select>>]
+
+.method public static testCase(Z)Z
+    .registers 1
+
+    # The select generation will replace this with a select
+    # instruction and a return.
+    if-eqz v0, :else
+    const v0, 0x1
+    return v0
+
+    :else
+    const v0, 0x0
+    return v0
+.end method
+
+
+## CHECK-START: java.lang.Object TestCase.referenceTypeTestCase(Main$Sub1, Main$Sub2, boolean) select_generator (before)
+## CHECK-DAG:     <<Param0:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param1:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param2:z\d+>>          ParameterValue
+## CHECK-DAG:                              If [<<Param2>>]
+## CHECK-DAG:                              Return [<<Param1>>]
+## CHECK-DAG:                              Return [<<Param0>>]
+
+## CHECK-START: java.lang.Object TestCase.referenceTypeTestCase(Main$Sub1, Main$Sub2, boolean) select_generator (after)
+## CHECK-DAG:     <<Param0:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param1:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param2:z\d+>>          ParameterValue
+## CHECK-DAG:     <<Select:l\d+>>          Select [<<Param1>>,<<Param0>>,<<Param2>>]
+## CHECK-DAG:                              Return [<<Select>>]
+
+.method public static referenceTypeTestCase(LMain$Sub1;LMain$Sub2;Z)Ljava/lang/Object;
+    .registers 3
+
+    if-eqz v2, :else
+    return-object v0
+
+    :else
+    return-object v1
+.end method
diff --git a/test/663-checker-select-generator/src/Main.java b/test/663-checker-select-generator/src/Main.java
new file mode 100644
index 0000000..c5c7a43
--- /dev/null
+++ b/test/663-checker-select-generator/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static class Super {}
+  public static class Sub1 {}
+  public static class Sub2 {}
+
+  public static void assertTrue(boolean result) {
+    if (!result) {
+      throw new Error("Expected true");
+    }
+  }
+
+  public static void assertFalse(boolean result) {
+    if (result) {
+      throw new Error("Expected false");
+    }
+  }
+
+  public static void assertInstanceOfSub1(Object result) {
+    if (!(result instanceof Sub1)) {
+      throw new Error("Expected instance of Sub1");
+    }
+  }
+
+  public static void assertInstanceOfSub2(Object result) {
+    if (!(result instanceof Sub2)) {
+      throw new Error("Expected instance of Sub2");
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase", boolean.class);
+    Method m2 = c.getMethod("referenceTypeTestCase", Sub1.class, Sub2.class, boolean.class);
+
+    try {
+      assertTrue((Boolean) m.invoke(null, true));
+      assertFalse((Boolean) m.invoke(null, false));
+      assertInstanceOfSub1(m2.invoke(null, new Sub1(), new Sub2(), true));
+      assertInstanceOfSub2(m2.invoke(null, new Sub1(), new Sub2(), false));
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+}
diff --git a/test/663-odd-dex-size/classes.dex b/test/663-odd-dex-size/classes.dex
new file mode 100644
index 0000000..633e3a2
--- /dev/null
+++ b/test/663-odd-dex-size/classes.dex
Binary files differ
diff --git a/test/663-odd-dex-size/expected.txt b/test/663-odd-dex-size/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size/info.txt b/test/663-odd-dex-size/info.txt
new file mode 100644
index 0000000..11a50e0
--- /dev/null
+++ b/test/663-odd-dex-size/info.txt
@@ -0,0 +1,14 @@
+Test for a dex file with an odd size in a vdex file.
+
+The code in the file is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
diff --git a/test/663-odd-dex-size2/663-odd-dex-size2.jar b/test/663-odd-dex-size2/663-odd-dex-size2.jar
new file mode 100644
index 0000000..a29224e
--- /dev/null
+++ b/test/663-odd-dex-size2/663-odd-dex-size2.jar
Binary files differ
diff --git a/test/663-odd-dex-size2/build b/test/663-odd-dex-size2/build
new file mode 100644
index 0000000..5636558
--- /dev/null
+++ b/test/663-odd-dex-size2/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nothing to do
diff --git a/test/663-odd-dex-size2/expected.txt b/test/663-odd-dex-size2/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size2/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size2/info.txt b/test/663-odd-dex-size2/info.txt
new file mode 100644
index 0000000..900394d
--- /dev/null
+++ b/test/663-odd-dex-size2/info.txt
@@ -0,0 +1,15 @@
+Test for two files with an odd size in a vdex file.
+
+The code in boths file is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
+
diff --git a/test/663-odd-dex-size3/663-odd-dex-size3.jar b/test/663-odd-dex-size3/663-odd-dex-size3.jar
new file mode 100644
index 0000000..d23ed57
--- /dev/null
+++ b/test/663-odd-dex-size3/663-odd-dex-size3.jar
Binary files differ
diff --git a/test/663-odd-dex-size3/build b/test/663-odd-dex-size3/build
new file mode 100644
index 0000000..5636558
--- /dev/null
+++ b/test/663-odd-dex-size3/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nothing to do
diff --git a/test/663-odd-dex-size3/expected.txt b/test/663-odd-dex-size3/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size3/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size3/info.txt b/test/663-odd-dex-size3/info.txt
new file mode 100644
index 0000000..256c77d
--- /dev/null
+++ b/test/663-odd-dex-size3/info.txt
@@ -0,0 +1,19 @@
+Test for a dex file with an odd size followed by an aligned dex file.
+
+The code in classes.dex is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
+
+The code in classes2.dex is:
+
+class Foo {
+}
diff --git a/test/663-odd-dex-size4/663-odd-dex-size4.jar b/test/663-odd-dex-size4/663-odd-dex-size4.jar
new file mode 100644
index 0000000..d229663
--- /dev/null
+++ b/test/663-odd-dex-size4/663-odd-dex-size4.jar
Binary files differ
diff --git a/test/663-odd-dex-size4/build b/test/663-odd-dex-size4/build
new file mode 100644
index 0000000..5636558
--- /dev/null
+++ b/test/663-odd-dex-size4/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nothing to do
diff --git a/test/663-odd-dex-size4/expected.txt b/test/663-odd-dex-size4/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size4/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size4/info.txt b/test/663-odd-dex-size4/info.txt
new file mode 100644
index 0000000..2c34557
--- /dev/null
+++ b/test/663-odd-dex-size4/info.txt
@@ -0,0 +1,19 @@
+Test for an aligned dex file followed by a dex file with an odd size.
+
+The code in classes.dex is:
+
+class Foo {
+}
+
+The code in classes2.dex is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
diff --git a/test/664-aget-verifier/aget-verifier.cc b/test/664-aget-verifier/aget-verifier.cc
new file mode 100644
index 0000000..41372ad
--- /dev/null
+++ b/test/664-aget-verifier/aget-verifier.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file.h"
+
+#include "art_method-inl.h"
+#include "jni.h"
+#include "method_reference.h"
+#include "mirror/class-inl.h"
+#include "mirror/executable.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_testCompiled(JNIEnv* env,
+                                                             jclass,
+                                                             jobject method) {
+  CHECK(method != nullptr);
+  ScopedObjectAccess soa(env);
+  ObjPtr<mirror::Executable> exec = soa.Decode<mirror::Executable>(method);
+  ArtMethod* art_method = exec->GetArtMethod();
+  return art_method->HasAnyCompiledCode();
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/664-aget-verifier/expected.txt b/test/664-aget-verifier/expected.txt
new file mode 100644
index 0000000..50e2e94
--- /dev/null
+++ b/test/664-aget-verifier/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+test method successfully verified/compiled.
diff --git a/test/664-aget-verifier/info.txt b/test/664-aget-verifier/info.txt
new file mode 100644
index 0000000..b59cacb
--- /dev/null
+++ b/test/664-aget-verifier/info.txt
@@ -0,0 +1,6 @@
+Tests how the verifier handles aget on an array that was initially null.
+
+The verifier will flag aget instructions as have_pending_runtime_throw_failure_
+if the array register is potentially null, even if the aget is guarded by null
+checks and never actually null at runtime. This fails compile-time verification,
+preventing otherwise good method from being compiled.
diff --git a/test/664-aget-verifier/src/Main.java b/test/664-aget-verifier/src/Main.java
new file mode 100644
index 0000000..7a92b17
--- /dev/null
+++ b/test/664-aget-verifier/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    test();
+
+    try {
+      if (testCompiled(Main.class.getDeclaredMethod("test"))) {
+        System.out.println("test method successfully verified/compiled.");
+      } else {
+        System.out.println("test method failed to verify/compile.");
+      }
+    } catch (Exception e) {
+      System.out.println("Got unexpected exception: " + e);
+    }
+  }
+
+  public static void test() {
+    int[] maybe_null_array = null;
+    for (int i = 0; i < 2; i++) {
+      int[] non_null_array = new int[1];
+      if (maybe_null_array != null) {
+        i = maybe_null_array[0] + 1;
+      }
+      maybe_null_array = non_null_array;
+    }
+  }
+
+  public static native boolean testCompiled(Method method);
+}
diff --git a/test/706-checker-scheduler/run b/test/706-checker-scheduler/run
new file mode 100644
index 0000000..5ffc303
--- /dev/null
+++ b/test/706-checker-scheduler/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/706-checker-scheduler/src-dex2oat-unresolved/UnresolvedClass.java b/test/706-checker-scheduler/src-dex2oat-unresolved/UnresolvedClass.java
new file mode 100644
index 0000000..4faa12a
--- /dev/null
+++ b/test/706-checker-scheduler/src-dex2oat-unresolved/UnresolvedClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UnresolvedClass {
+  public static int staticInt;
+  public int instanceInt;
+}
+
diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java
index a68565b..08a23a7 100644
--- a/test/706-checker-scheduler/src/Main.java
+++ b/test/706-checker-scheduler/src/Main.java
@@ -31,6 +31,7 @@
   public ExampleObj my_obj;
   public static int number1;
   public static int number2;
+  public static volatile int number3;
 
   /// CHECK-START-ARM64: int Main.arrayAccess() scheduler (before)
   /// CHECK:    <<Const1:i\d+>>       IntConstant 1
@@ -340,6 +341,87 @@
     }
   }
 
+  /// CHECK-START-ARM: void Main.accessFieldsVolatile() scheduler (before)
+  /// CHECK-START-ARM64: void Main.accessFieldsVolatile() scheduler (before)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+
+  /// CHECK-START-ARM: void Main.accessFieldsVolatile() scheduler (after)
+  /// CHECK-START-ARM64: void Main.accessFieldsVolatile() scheduler (after)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+
+  public void accessFieldsVolatile() {
+    my_obj = new ExampleObj(1, 2);
+    for (int i = 0; i < 10; i++) {
+      my_obj.n1++;
+      my_obj.n2++;
+      number1++;
+      number3++;
+    }
+  }
+
+  /// CHECK-START-ARM: void Main.accessFieldsUnresolved() scheduler (before)
+  /// CHECK-START-ARM64: void Main.accessFieldsUnresolved() scheduler (before)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            UnresolvedInstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedInstanceFieldSet
+  /// CHECK:            UnresolvedStaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedStaticFieldSet
+
+  /// CHECK-START-ARM: void Main.accessFieldsUnresolved() scheduler (after)
+  /// CHECK-START-ARM64: void Main.accessFieldsUnresolved() scheduler (after)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            UnresolvedInstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedInstanceFieldSet
+  /// CHECK:            UnresolvedStaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedStaticFieldSet
+
+  public void accessFieldsUnresolved() {
+    my_obj = new ExampleObj(1, 2);
+    UnresolvedClass unresolved_obj = new UnresolvedClass();
+    for (int i = 0; i < 10; i++) {
+      my_obj.n1++;
+      my_obj.n2++;
+      unresolved_obj.instanceInt++;
+      UnresolvedClass.staticInt++;
+    }
+  }
+
   /// CHECK-START-ARM64: int Main.intDiv(int) scheduler (before)
   /// CHECK:               Sub
   /// CHECK:               DivZeroCheck
diff --git a/test/Android.bp b/test/Android.bp
index fab664a..7413ee5 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -411,6 +411,7 @@
         "642-fp-callees/fp_callees.cc",
         "647-jni-get-field-id/get_field_id.cc",
         "656-annotation-lookup-generic-jni/test.cc",
+	"664-aget-verifier/aget-verifier.cc",
         "708-jit-cache-churn/jit.cc"
     ],
     shared_libs: [
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 7c0ed69..60c7315 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -254,4 +254,17 @@
   return Runtime::Current()->GetNumberOfDeoptimizations();
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_fetchProfiles(JNIEnv*, jclass) {
+  jit::Jit* jit = GetJitIfEnabled();
+  if (jit == nullptr) {
+    return;
+  }
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  std::vector<ProfileMethodInfo> unused_vector;
+  std::set<std::string> unused_locations;
+  unused_locations.insert("fake_location");
+  ScopedObjectAccess soa(Thread::Current());
+  code_cache->GetProfiledMethods(unused_locations, unused_vector);
+}
+
 }  // namespace art
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 5a67fbc..20cfc34 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -54,11 +54,6 @@
                         "doesn't (and isn't meant to) work with --prebuild."]
     },
     {
-        "tests": ["529-checker-unresolved"],
-        "variant": "no-prebuild",
-        "bug": "http://b/27784033"
-    },
-    {
         "tests": ["117-nopatchoat",
                   "147-stripped-dex-fallback",
                   "608-checker-unresolved-lse"],
@@ -505,6 +500,7 @@
             "641-checker-arraycopy",
             "643-checker-bogus-ic",
             "645-checker-abs-simd",
+            "663-checker-select-generator",
             "706-checker-scheduler"],
         "description": ["Checker tests are not compatible with jvmti."],
         "variant": "jvmti-stress | redefine-stress | trace-stress | field-stress | step-stress"
@@ -711,5 +707,17 @@
         "variant": "no-image | no-dex2oat | no-prebuild",
         "description": ["Tests <clinit> for app images, which --no-image, --no-prebuild and",
                         "--no-dex2oat do not create"]
+    },
+    {
+        "tests": ["961-default-iface-resolution-gen",
+                  "964-default-iface-init-gen",
+                  "968-default-partial-compile-gen"],
+        "env_vars": {"SANITIZE_HOST": "address"},
+        "description": ["Test hits dex2oat watchdog timeout (60sec) on art-asan"]
+    },
+    {
+        "tests": "664-aget-verifier",
+        "description": ["Aget on potentially null array fails verification."],
+        "bug": "b/64683522"
     }
 ]
diff --git a/test/run-test b/test/run-test
index 486b465..e6196a0 100755
--- a/test/run-test
+++ b/test/run-test
@@ -45,7 +45,7 @@
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
-export USE_JACK="true"
+export USE_JACK="false"
 export USE_DESUGAR="true"
 export SMALI_ARGS=""
 
@@ -926,6 +926,11 @@
             tail -n 3000 "$tmp_dir/$strace_output"
             echo '####################'
         fi
+        if [ "x$target_mode" = "xno" -a "x$SANITIZE_HOST" = "xaddress" ]; then
+            # Run the stack script to symbolize any ASAN aborts on the host for SANITIZE_HOST. The
+            # tools used by the given ABI work for both x86 and x86-64.
+            echo "ABI: 'x86_64'" | cat - "$output" | $ANDROID_BUILD_TOP/development/scripts/stack | tail -n 3000
+        fi
         echo ' '
     fi
 
diff --git a/test/testrunner/env.py b/test/testrunner/env.py
index b996b04..d45d009 100644
--- a/test/testrunner/env.py
+++ b/test/testrunner/env.py
@@ -33,7 +33,8 @@
                         'TARGET_ARCH',
                         'HOST_PREFER_32_BIT',
                         'HOST_OUT_EXECUTABLES',
-                        'ANDROID_JAVA_TOOLCHAIN']
+                        'ANDROID_JAVA_TOOLCHAIN',
+                        'ANDROID_COMPILE_WITH_JACK']
 _DUMP_MANY_VARS = None  # To be set to a dictionary with above list being the keys,
                         # and the build variable being the value.
 def _dump_many_vars(var_name):
@@ -78,6 +79,15 @@
 def _get_build_var(var_name):
   return _dump_many_vars(var_name)
 
+def _get_build_var_boolean(var, default):
+  val = _get_build_var(var)
+  if val:
+    if val == "True" or val == "true":
+      return True
+    if val == "False" or val == "false":
+      return False
+  return default
+
 def get_env(key):
   return _env.get(key)
 
@@ -97,7 +107,7 @@
 ANDROID_BUILD_TOP = _get_android_build_top()
 
 # Compiling with jack? Possible values in (True, False, 'default')
-ANDROID_COMPILE_WITH_JACK = _getEnvBoolean('ANDROID_COMPILE_WITH_JACK', 'default')
+ANDROID_COMPILE_WITH_JACK = _get_build_var_boolean('ANDROID_COMPILE_WITH_JACK', 'default')
 
 # Directory used for temporary test files on the host.
 ART_HOST_TEST_DIR = tempfile.mkdtemp(prefix = 'test-art-')
diff --git a/test/ti-agent/jvmti_helper.cc b/test/ti-agent/jvmti_helper.cc
index 0d5cb39..7280102 100644
--- a/test/ti-agent/jvmti_helper.cc
+++ b/test/ti-agent/jvmti_helper.cc
@@ -58,7 +58,7 @@
     .can_get_line_numbers                            = 1,
     .can_get_source_debug_extension                  = 1,
     .can_access_local_variables                      = 0,
-    .can_maintain_original_method_order              = 0,
+    .can_maintain_original_method_order              = 1,
     .can_generate_single_step_events                 = 1,
     .can_generate_exception_events                   = 0,
     .can_generate_frame_pop_events                   = 0,
diff --git a/test/ti-agent/trace_helper.cc b/test/ti-agent/trace_helper.cc
index 7a9d1e0..1f8ceff 100644
--- a/test/ti-agent/trace_helper.cc
+++ b/test/ti-agent/trace_helper.cc
@@ -388,10 +388,12 @@
   data->single_step = single_step != nullptr ? env->FromReflectedMethod(single_step) : nullptr;
   data->in_callback = false;
 
-  void* old_data = nullptr;
-  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->GetEnvironmentLocalStorage(&old_data))) {
+  TraceData* old_data = nullptr;
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->GetEnvironmentLocalStorage(
+                                reinterpret_cast<void**>(&old_data)))) {
     return;
-  } else if (old_data != nullptr) {
+  } else if (old_data != nullptr && old_data->test_klass != nullptr) {
     ScopedLocalRef<jclass> rt_exception(env, env->FindClass("java/lang/RuntimeException"));
     env->ThrowNew(rt_exception.get(), "Environment already has local storage set!");
     return;
@@ -455,6 +457,21 @@
 
 extern "C" JNIEXPORT void JNICALL Java_art_Trace_disableTracing(
     JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jthread thr) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(
+      env, jvmti_env, jvmti_env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
+    return;
+  }
+  // If data is null then we haven't ever enabled tracing so we don't need to do anything.
+  if (data == nullptr || data->test_klass == nullptr) {
+    return;
+  }
+  env->DeleteGlobalRef(data->test_klass);
+  if (env->ExceptionCheck()) {
+    return;
+  }
+  // Clear test_klass so we know this isn't being used
+  data->test_klass = nullptr;
   if (JvmtiErrorToException(env, jvmti_env,
                             jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                                 JVMTI_EVENT_FIELD_ACCESS,
diff --git a/tools/Android.mk b/tools/Android.mk
index bc2fd8c..9ecf0cd 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -20,13 +20,15 @@
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_CLASS := EXECUTABLES
-LOCAL_MODULE := art
+LOCAL_MODULE := art-script
 LOCAL_SRC_FILES := art
+LOCAL_MODULE_STEM := art
 include $(BUILD_PREBUILT)
 
 # Copy the art shell script to the target's bin directory
 include $(CLEAR_VARS)
 LOCAL_MODULE_CLASS := EXECUTABLES
-LOCAL_MODULE := art
+LOCAL_MODULE := art-script
 LOCAL_SRC_FILES := art
+LOCAL_MODULE_STEM := art
 include $(BUILD_PREBUILT)
diff --git a/tools/art b/tools/art
index bc0c85e..15993dd 100644
--- a/tools/art
+++ b/tools/art
@@ -17,13 +17,13 @@
 # Android (e.g. mksh).
 
 # Globals
-ARCHS={arm,arm64,mips,mips64,x86,x86_64}
 ART_BINARY=dalvikvm
 DELETE_ANDROID_DATA="no"
 LAUNCH_WRAPPER=
 LIBART=libart.so
 JIT_PROFILE="no"
 VERBOSE="no"
+CLEAN_OAT_FILES="yes"
 EXTRA_OPTIONS=()
 
 # Follow all sym links to get the program name.
@@ -88,6 +88,7 @@
                            report upon completion.
   --profile                Run with profiling, then run using profile data.
   --verbose                Run script verbosely.
+  --no-clean               Don't cleanup oat directories.
 
 The ART_OPTIONS are passed directly to the Android Runtime.
 
@@ -120,26 +121,98 @@
   env "$@"
 }
 
+# Parse a colon-separated list into an array (e.g. "foo.dex:bar.dex" -> (foo.dex bar.dex))
+PARSE_CLASSPATH_RESULT=()  # Return value will be here due to shell limitations.
+parse_classpath() {
+  local cp="$1"
+  local oldifs=$IFS
+
+  local cp_array
+  cp_array=()
+
+  IFS=":"
+  for part in $cp; do
+    cp_array+=("$part")
+  done
+  IFS=$oldifs
+
+  PARSE_CLASSPATH_RESULT=("${cp_array[@]}")
+}
+
+# Sets 'PARSE_CLASSPATH_RESULT' to an array of class path dex files.
+# e.g. (-cp foo/classes.dex:bar/classes.dex) -> (foo/classes.dex bar/classes.dex)
+find_cp_in_args() {
+  local found="false"
+  local index=0
+  local what
+
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      -cp|-classpath)
+        parse_classpath "$2"
+        # Sets 'PARSE_CLASSPATH_RESULT' to an array of class path dex files.
+        # Subsequent parses will overwrite the preceding.
+        shift
+        ;;
+    esac
+    shift
+  done
+}
+
+# Delete the 'oat' directories relative to the classpath's dex files.
+# e.g. (foo/classes.dex bar/classes.dex) would delete (foo/oat bar/oat) directories.
+cleanup_oat_directory() {
+  local classpath
+  classpath=("$@")
+
+  local dirpath
+
+  for path in "${classpath[@]}"; do
+    dirpath="$(dirname "$path")"
+    [[ -d "$dirpath" ]] && verbose_run rm -rf "$dirpath/oat"
+  done
+}
+
+# Parse -cp <CP>, -classpath <CP>, and $CLASSPATH to find the dex files.
+# Each dex file's directory will have an 'oat' file directory, delete it.
+# Input: Command line arguments to the art script.
+# e.g. -cp foo/classes.dex:bar/classes.dex would delete (foo/oat bar/oat) directories.
+cleanup_oat_directory_for_classpath() {
+  if [ "$CLEAN_OAT_FILES" = "yes" ]; then
+    # First try: Use $CLASSPATH environment variable.
+    parse_classpath "$CLASSPATH"
+    # Second try: Look for latest -cp or -classpath arg which will take precedence.
+    find_cp_in_args "$@"
+
+    cleanup_oat_directory "${PARSE_CLASSPATH_RESULT[@]}"
+  fi
+}
+
+# Attempt to find $ANDROID_ROOT/framework/<isa>/core.art' without knowing what <isa> is.
+function check_if_boot_image_file_exists() {
+  local image_location_dir="$1"
+  local image_location_name="$2"
+
+  # Expand image_files to a list of existing image files on the disk.
+  # If no such files exist, it expands to single element 'dir/*/file' with a literal '*'.
+  local image_files
+  image_files=("$image_location_dir"/*/"$image_location_name") # avoid treating "*" as literal.
+
+  # Array always has at least 1 element. Test explicitly whether the file exists.
+  [[ -e "${image_files[0]}" ]]
+}
+
 # Automatically find the boot image location. It uses core.art by default.
 # On a real device, it might only have a boot.art, so use that instead when core.art does not exist.
 function detect_boot_image_location() {
   local image_location_dir="$ANDROID_ROOT/framework"
   local image_location_name="core.art"
 
-  local maybe_arch
-  local core_image_exists="false"
-
-  # Parse ARCHS={a,b,c,d} syntax.
-  local array
-  IFS=, read -a array <<< "${ARCHS:1:(-1)}";
-  for maybe_arch in "${array[@]}"; do
-    if [[ -e "$image_location_dir/$maybe_arch/$image_location_name" ]]; then
-      core_image_exists="true"
-      break
-    fi
-  done
-
-  if [[ "$core_image_exists" == "false" ]]; then
+  # If there are no existing core.art, try to find boot.art.
+  # If there is no boot.art then leave it as-is, assumes -Ximage is explicitly used.
+  # Otherwise let dalvikvm give the error message about an invalid image file.
+  if ! check_if_boot_image_file_exists "$image_location_dir" "core.art" && \
+       check_if_boot_image_file_exists "$image_location_dir" "boot.art"; then
     image_location_name="boot.art"
   fi
 
@@ -147,9 +220,15 @@
   echo "$image_location"
 }
 
+# Runs dalvikvm, returns its exit code.
+# (Oat directories are cleaned up in between runs)
 function run_art() {
   local image_location="$(detect_boot_image_location)"
+  local ret
 
+  # First cleanup any left-over 'oat' files from the last time dalvikvm was run.
+  cleanup_oat_directory_for_classpath "$@"
+  # Run dalvikvm.
   verbose_run ANDROID_DATA=$ANDROID_DATA               \
               ANDROID_ROOT=$ANDROID_ROOT               \
               LD_LIBRARY_PATH=$LD_LIBRARY_PATH         \
@@ -160,6 +239,13 @@
               -Xnorelocate                             \
               -Ximage:"$image_location"                \
               "$@"
+  ret=$?
+
+  # Avoid polluting disk with 'oat' files after dalvikvm has finished.
+  cleanup_oat_directory_for_classpath "$@"
+
+  # Forward exit code of dalvikvm.
+  return $ret
 }
 
 while [[ "$1" = "-"* ]]; do
@@ -209,6 +295,9 @@
   --verbose)
     VERBOSE="yes"
     ;;
+  --no-clean)
+    CLEAN_OAT_FILES="no"
+    ;;
   --*)
     echo "unknown option: $1" 1>&2
     usage
@@ -251,7 +340,7 @@
     # by default.
     ANDROID_DATA="$ANDROID_DATA/local/tmp/android-data$$"
   fi
-  mkdir -p $ANDROID_DATA/dalvik-cache/$ARCHS
+  mkdir -p "$ANDROID_DATA"
   DELETE_ANDROID_DATA="yes"
 fi
 
@@ -264,7 +353,7 @@
   # Create the profile. The runtime expects profiles to be created before
   # execution.
   PROFILE_PATH="$ANDROID_DATA/primary.prof"
-  touch $PROFILE_PATH
+  touch "$PROFILE_PATH"
 
   # Replace the compiler filter with quicken so that we
   # can capture the profile.
@@ -282,13 +371,15 @@
   EXIT_STATUS=$?
 
   if [ $EXIT_STATUS != 0 ]; then
-    cat "$ANDROID_DATA/profile_gen.log"
+    echo "Profile run failed: " >&2
+    cat "$ANDROID_DATA/profile_gen.log" >&2
     clean_android_data
     exit $EXIT_STATUS
   fi
 
-  # Wipe dalvik-cache to prepare it for the next invocation.
-  rm -rf $ANDROID_DATA/dalvik-cache/$ARCHS/*
+  # Wipe dalvik-cache so that a subsequent run_art must regenerate it.
+  # Leave $ANDROID_DATA intact since it contains our profile file.
+  rm -rf "$ANDROID_DATA/dalvik-cache"
 
   # Append arguments so next invocation of run_art uses the profile.
   EXTRA_OPTIONS+=(-Xcompiler-option --profile-file="$PROFILE_PATH")
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 75694c3..4f99ac3 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -19,6 +19,8 @@
   exit 1
 fi
 
+source build/envsetup.sh >&/dev/null # for get_build_var
+
 # Logic for setting out_dir from build/make/core/envsetup.mk:
 if [[ -z $OUT_DIR ]]; then
   if [[ -z $OUT_DIR_COMMON_BASE ]]; then
@@ -30,10 +32,7 @@
   out_dir=${OUT_DIR}
 fi
 
-using_jack=true
-if [[ $ANDROID_COMPILE_WITH_JACK == false ]]; then
-  using_jack=false
-fi
+using_jack=$(get_build_var ANDROID_COMPILE_WITH_JACK)
 
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
 common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target"
@@ -63,7 +62,7 @@
   fi
 done
 
-if $using_jack; then
+if [[ $using_jack == "true" ]]; then
   common_targets="$common_targets ${out_dir}/host/linux-x86/bin/jack"
 fi
 
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
index 41e7e40..1d5ce7d 100644
--- a/tools/cpp-define-generator/offset_runtime.def
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -40,6 +40,10 @@
 DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_AND_ARGS, art::CalleeSaveType::kSaveRefsAndArgs)
 // Offset of field Runtime::callee_save_methods_[kSaveEverything]
 DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, art::CalleeSaveType::kSaveEverything)
+// Offset of field Runtime::callee_save_methods_[kSaveEverythingForClinit]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING_FOR_CLINIT, art::CalleeSaveType::kSaveEverythingForClinit)
+// Offset of field Runtime::callee_save_methods_[kSaveEverythingForSuspendCheck]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING_FOR_SUSPEND_CHECK, art::CalleeSaveType::kSaveEverythingForSuspendCheck)
 
 #undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
 #include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpplint_presubmit.py b/tools/cpplint_presubmit.py
index 4781517..b42a691 100755
--- a/tools/cpplint_presubmit.py
+++ b/tools/cpplint_presubmit.py
@@ -21,7 +21,7 @@
 import subprocess
 import sys
 
-IGNORED_FILES = {"runtime/elf.h", "runtime/openjdkjvmti/include/jvmti.h"}
+IGNORED_FILES = {"runtime/elf.h", "openjdkjvmti/include/jvmti.h"}
 
 INTERESTING_SUFFIXES = {".h", ".cc"}
 
diff --git a/tools/dexfuzz/README b/tools/dexfuzz/README
index 1f74262..fff5473 100644
--- a/tools/dexfuzz/README
+++ b/tools/dexfuzz/README
@@ -139,6 +139,7 @@
 InstructionSwapper 80
 InvokeChanger 30
 NewArrayLengthChanger 50
+NewInstanceChanger 10
 NewMethodCaller 10
 NonsenseStringPrinter 10
 OppositeBranchChanger 40
diff --git a/tools/dexfuzz/src/dexfuzz/DexFuzz.java b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
index 2b3b8e7..feb5a13 100644
--- a/tools/dexfuzz/src/dexfuzz/DexFuzz.java
+++ b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
@@ -33,9 +33,9 @@
  * Entrypoint class for dexfuzz.
  */
 public class DexFuzz {
-  // Last version update 1.7: changed the likelihood of RegisterClobber.
+  // Last version update 1.9: fixed a bug in InvokeChanger.
   private static int majorVersion = 1;
-  private static int minorVersion = 7;
+  private static int minorVersion = 9;
   private static int seedChangeVersion = 0;
 
   /**
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Executor.java b/tools/dexfuzz/src/dexfuzz/executors/Executor.java
index 074672d..0367a83 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Executor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Executor.java
@@ -114,8 +114,6 @@
 
     commandBuilder.append("--oat-file=output.oat ");
     commandBuilder.append("--android-root=").append(device.getAndroidHostOut()).append(" ");
-    commandBuilder.append("--runtime-arg -classpath ");
-    commandBuilder.append("--runtime-arg ").append(programName).append(" ");
     commandBuilder.append("--dex-file=").append(programName).append(" ");
     commandBuilder.append("--compiler-filter=quicken --runtime-arg -Xnorelocate ");
 
diff --git a/tools/dexfuzz/src/dexfuzz/program/Program.java b/tools/dexfuzz/src/dexfuzz/program/Program.java
index bb2f4c0..c6fa6c4 100644
--- a/tools/dexfuzz/src/dexfuzz/program/Program.java
+++ b/tools/dexfuzz/src/dexfuzz/program/Program.java
@@ -32,6 +32,7 @@
 import dexfuzz.program.mutators.InstructionSwapper;
 import dexfuzz.program.mutators.InvokeChanger;
 import dexfuzz.program.mutators.NewArrayLengthChanger;
+import dexfuzz.program.mutators.NewInstanceChanger;
 import dexfuzz.program.mutators.NewMethodCaller;
 import dexfuzz.program.mutators.NonsenseStringPrinter;
 import dexfuzz.program.mutators.OppositeBranchChanger;
@@ -54,6 +55,7 @@
 import dexfuzz.rawdex.ProtoIdItem;
 import dexfuzz.rawdex.RawDexFile;
 import dexfuzz.rawdex.TypeIdItem;
+import dexfuzz.rawdex.TypeList;
 import dexfuzz.rawdex.formats.ContainsPoolIndex.PoolIndexKind;
 
 import java.io.BufferedReader;
@@ -204,6 +206,7 @@
     registerMutator(new InstructionSwapper(rng, mutationStats, mutations));
     registerMutator(new InvokeChanger(rng, mutationStats, mutations));
     registerMutator(new NewArrayLengthChanger(rng, mutationStats, mutations));
+    registerMutator(new NewInstanceChanger(rng, mutationStats, mutations));
     registerMutator(new NewMethodCaller(rng, mutationStats, mutations));
     registerMutator(new NonsenseStringPrinter(rng, mutationStats, mutations));
     registerMutator(new OppositeBranchChanger(rng, mutationStats, mutations));
@@ -609,4 +612,45 @@
         fieldIdx));
     return null;
   }
-}
+
+  /**
+   * Used to convert the type index into string format.
+   * @param typeIdx
+   * @return string format of type index.
+   */
+  public String getTypeString(int typeIdx) {
+    TypeIdItem typeIdItem = rawDexFile.typeIds.get(typeIdx);
+    return rawDexFile.stringDatas.get(typeIdItem.descriptorIdx).getString();
+  }
+
+  /**
+   * Used to convert the method index into string format.
+   * @param methodIdx
+   * @return string format of method index.
+   */
+  public String getMethodString(int methodIdx) {
+    MethodIdItem methodIdItem = rawDexFile.methodIds.get(methodIdx);
+    return rawDexFile.stringDatas.get(methodIdItem.nameIdx).getString();
+  }
+
+  /**
+   * Used to convert methodID to string format of method proto.
+   * @param methodIdx
+   * @return string format of shorty.
+   */
+  public String getMethodProto(int methodIdx) {
+    MethodIdItem methodIdItem = rawDexFile.methodIds.get(methodIdx);
+    ProtoIdItem protoIdItem = rawDexFile.protoIds.get(methodIdItem.protoIdx);
+
+    if (!protoIdItem.parametersOff.pointsToSomething()) {
+      return "()" + getTypeString(protoIdItem.returnTypeIdx);
+    }
+
+    TypeList typeList = (TypeList) protoIdItem.parametersOff.getPointedToItem();
+    String typeItem = "(";
+    for (int i= 0; i < typeList.size; i++) {
+      typeItem = typeItem + typeList.list[i];
+    }
+    return typeItem + ")" + getTypeString(protoIdItem.returnTypeIdx);
+  }
+}
\ No newline at end of file
diff --git a/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java b/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
index 8750fc6..f0ed83a 100644
--- a/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
+++ b/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
@@ -167,7 +167,7 @@
   }
 
   private boolean isRangeInvokeInst(Opcode opcode){
-    return Opcode.isBetween(opcode, Opcode.INVOKE_VIRTUAL, Opcode.INVOKE_INTERFACE);
+    return Opcode.isBetween(opcode, Opcode.INVOKE_VIRTUAL_RANGE, Opcode.INVOKE_INTERFACE_RANGE);
 
   }
 
diff --git a/tools/dexfuzz/src/dexfuzz/program/mutators/NewInstanceChanger.java b/tools/dexfuzz/src/dexfuzz/program/mutators/NewInstanceChanger.java
new file mode 100644
index 0000000..cbf79e3
--- /dev/null
+++ b/tools/dexfuzz/src/dexfuzz/program/mutators/NewInstanceChanger.java
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package dexfuzz.program.mutators;
+
+import dexfuzz.Log;
+import dexfuzz.MutationStats;
+import dexfuzz.program.MInsn;
+import dexfuzz.program.MutatableCode;
+import dexfuzz.program.Mutation;
+import dexfuzz.rawdex.Opcode;
+import dexfuzz.rawdex.formats.ContainsPoolIndex;
+import dexfuzz.rawdex.formats.ContainsPoolIndex.PoolIndexKind;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Mutator NewInstanceChanger changes the new instance type in a method to
+ * any random type from the pool.
+ */
+public class NewInstanceChanger extends CodeMutator {
+
+  /**
+   * Every CodeMutator has an AssociatedMutation, representing the
+   * mutation that this CodeMutator can perform, to allow separate
+   * generateMutation() and applyMutation() phases, allowing serialization.
+   */
+  public static class AssociatedMutation extends Mutation {
+    public int newInstanceToChangeIdx;
+    public int newInstanceTypeIdx;
+
+    @Override
+    public String getString() {
+      StringBuilder builder = new StringBuilder();
+      builder.append(newInstanceToChangeIdx).append(" ");
+      builder.append(newInstanceTypeIdx);
+      return builder.toString();
+    }
+
+    @Override
+    public void parseString(String[] elements) {
+      newInstanceToChangeIdx = Integer.parseInt(elements[2]);
+      newInstanceTypeIdx = Integer.parseInt(elements[3]);
+    }
+  }
+
+  // The following two methods are here for the benefit of MutationSerializer,
+  // so it can create a CodeMutator and get the correct associated Mutation, as it
+  // reads in mutations from a dump of mutations.
+  @Override
+  public Mutation getNewMutation() {
+    return new AssociatedMutation();
+  }
+
+  public NewInstanceChanger() {}
+
+  public NewInstanceChanger(Random rng, MutationStats stats, List<Mutation> mutations) {
+    super(rng, stats, mutations);
+    likelihood = 10;
+  }
+
+  // A cache that should only exist between generateMutation() and applyMutation(),
+  // or be created at the start of applyMutation(), if we're reading in mutations from
+  // a file.
+  private List<MInsn> newInstanceCachedInsns = null;
+
+  private void generateCachedNewInstanceInsns(MutatableCode mutatableCode) {
+    if (newInstanceCachedInsns != null) {
+      return;
+    }
+
+    newInstanceCachedInsns = new ArrayList<MInsn>();
+
+    for (MInsn mInsn : mutatableCode.getInstructions()) {
+      if (mInsn.insn.info.opcode == Opcode.NEW_INSTANCE) {
+        newInstanceCachedInsns.add(mInsn);
+      }
+    }
+  }
+
+  @Override
+  protected boolean canMutate(MutatableCode mutatableCode) {
+    // Cannot change the pool index with only one type.
+    if (mutatableCode.program.getTotalPoolIndicesByKind(PoolIndexKind.Type) < 2) {
+      Log.debug("Cannot mutate, only one type, skipping...");
+      return false;
+    }
+
+    for (MInsn mInsn : mutatableCode.getInstructions()) {
+      if (mInsn.insn.info.opcode == Opcode.NEW_INSTANCE) {
+        return true;
+      }
+    }
+    Log.debug("No New Instance in method, skipping...");
+    return false;
+  }
+
+  @Override
+  protected Mutation generateMutation(MutatableCode mutatableCode) {
+    generateCachedNewInstanceInsns(mutatableCode);
+
+    int newInstanceIdxInCache = rng.nextInt(newInstanceCachedInsns.size());
+    MInsn newInstanceInsn = newInstanceCachedInsns.get(newInstanceIdxInCache);
+    int oldTypeIdx = (int) newInstanceInsn.insn.vregB;
+    int newTypeIdx = 0;
+    int totalPoolIndices = mutatableCode.program.getTotalPoolIndicesByKind(PoolIndexKind.Type);
+    if (totalPoolIndices < 2) {
+      Log.errorAndQuit("Less than two types present, quitting...");
+    }
+
+    while (newTypeIdx == oldTypeIdx) {
+      newTypeIdx = rng.nextInt(totalPoolIndices);
+    }
+
+    AssociatedMutation mutation = new AssociatedMutation();
+    mutation.setup(this.getClass(), mutatableCode);
+    mutation.newInstanceToChangeIdx = newInstanceIdxInCache;
+    mutation.newInstanceTypeIdx = newTypeIdx;
+    return mutation;
+  }
+
+  @Override
+  protected void applyMutation(Mutation uncastMutation) {
+    // Cast the Mutation to our AssociatedMutation, so we can access its fields.
+    AssociatedMutation mutation = (AssociatedMutation) uncastMutation;
+    MutatableCode mutatableCode = mutation.mutatableCode;
+
+    generateCachedNewInstanceInsns(mutatableCode);
+
+    MInsn newInstanceInsn = newInstanceCachedInsns.get(mutation.newInstanceToChangeIdx);
+
+    ContainsPoolIndex poolIndex = ((ContainsPoolIndex)newInstanceInsn.insn.info.format);
+
+    poolIndex.setPoolIndex(newInstanceInsn.insn, mutation.newInstanceTypeIdx);
+
+    Log.info("Changed the type of " + newInstanceInsn.toString() +
+        " to " + mutation.newInstanceTypeIdx);
+
+    int foundNewInstanceInsnIdx =
+        foundInsnIdx(mutatableCode, newInstanceCachedInsns.get(mutation.newInstanceToChangeIdx));
+
+    changeInvokeDirect(foundNewInstanceInsnIdx, mutation);
+
+    stats.incrementStat("Changed new instance.");
+
+    // Clear cache.
+    newInstanceCachedInsns = null;
+  }
+
+  /**
+   * Try to find the invoke-direct/ invoke-direct-range instruction that follows
+   * the new instance instruction and change the method ID of the instruction.
+   * @param foundInsnIdx
+   * @param uncastMutation
+   */
+  protected void changeInvokeDirect(int foundInsnIdx, Mutation uncastMutation) {
+    AssociatedMutation mutation = (AssociatedMutation) uncastMutation;
+    MutatableCode mutatableCode = mutation.mutatableCode;
+    if (foundInsnIdx == -1 ||
+        foundInsnIdx + 1 == mutatableCode.getInstructionCount()) {
+      return;
+    }
+
+    MInsn insn = mutatableCode.getInstructionAt(foundInsnIdx + 1);
+    if (isInvokeInst(insn)) {
+      ContainsPoolIndex poolIndex =((ContainsPoolIndex)insn.insn.info.format);
+      long oldMethodIdx = poolIndex.getPoolIndex(insn.insn);
+      String className = mutatableCode.program.getTypeString(mutation.newInstanceTypeIdx);
+      String methodName = mutatableCode.program.getMethodString((int) oldMethodIdx);
+      String shorty = mutatableCode.program.getMethodProto((int) oldMethodIdx);
+
+      // Matches the type of the invoke with the randomly changed type of the prior new-instance.
+      // This might create a lot of verification failures but still works many times.
+      // TODO: Work on generating a program which finds a valid type.
+      int methodId = mutatableCode.program.getNewItemCreator().
+          findOrCreateMethodId(className, methodName, shorty);
+
+      poolIndex.setPoolIndex(insn.insn, mutation.newInstanceTypeIdx);
+
+      insn.insn.vregB = methodId;
+
+      Log.info("Changed " + oldMethodIdx + " to " + methodId);
+    }
+  }
+
+  protected boolean isInvokeInst(MInsn mInsn) {
+    return (mInsn.insn.info.opcode == Opcode.INVOKE_DIRECT ||
+        mInsn.insn.info.opcode == Opcode.INVOKE_DIRECT_RANGE);
+  }
+
+  // Check if there is an new instance instruction, and if found, return the index.
+  // If not, return -1.
+  protected int foundInsnIdx(MutatableCode mutatableCode, MInsn newInstanceInsn) {
+    int i = 0;
+    for (MInsn mInsn : mutatableCode.getInstructions()) {
+      if (mInsn == newInstanceInsn) {
+        return i;
+      }
+      i++;
+    }
+    return -1;
+  }
+}
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index c6553f8..ea26b0e 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -211,13 +211,6 @@
           "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
 },
 {
-  description: "Test is timing sensitive",
-  result: EXEC_FAILED,
-  bug: 62528691,
-  modes: [device],
-  names: ["libcore.java.util.TimeZoneTest#testSetDefaultRace"]
-},
-{
   description: "Repeated annotations do not work in javac (OpenJDK8), fixed in OpenJDK9.
                 Blacklisted to support javac/dx build (b/36902714)",
   result: EXEC_FAILED,
diff --git a/tools/libcore_gcstress_debug_failures.txt b/tools/libcore_gcstress_debug_failures.txt
index b4c6f2b..5806b61 100644
--- a/tools/libcore_gcstress_debug_failures.txt
+++ b/tools/libcore_gcstress_debug_failures.txt
@@ -8,8 +8,11 @@
   description: "Timeouts on target with gcstress and debug.",
   result: EXEC_FAILED,
   modes: [device],
-  names: ["libcore.icu.TransliteratorTest#testAll",
+  names: ["jsr166.CompletableFutureTest#testCompleteOnTimeout_completed",
+          "libcore.icu.TransliteratorTest#testAll",
+          "libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
           "libcore.java.lang.ref.ReferenceQueueTest#testRemoveWithDelayedResultAndTimeout",
+          "libcore.java.lang.ref.ReferenceQueueTest#testRemoveWithDelayedResultAndNoTimeout",
           "libcore.java.util.TimeZoneTest#testSetDefaultDeadlock",
           "org.apache.harmony.tests.java.util.TimerTest#testThrowingTaskKillsTimerThread"]
 }
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 17c84b4..355646b 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -19,24 +19,19 @@
   exit 1
 fi
 
-if [ -z "$ANDROID_JAVA_TOOLCHAIN" ] ; then
-  source build/envsetup.sh
-  setpaths # include platform prebuilt java, javac, etc in $PATH.
-fi
+source build/envsetup.sh >&/dev/null # for get_build_var, setpaths
+setpaths # include platform prebuilt java, javac, etc in $PATH.
 
 if [ -z "$ANDROID_HOST_OUT" ] ; then
   ANDROID_HOST_OUT=${OUT_DIR-$ANDROID_BUILD_TOP/out}/host/linux-x86
 fi
 
-using_jack=true
-if [[ $ANDROID_COMPILE_WITH_JACK == false ]]; then
-  using_jack=false
-fi
+using_jack=$(get_build_var ANDROID_COMPILE_WITH_JACK)
 
 function jlib_suffix {
   local str=$1
   local suffix="jar"
-  if $using_jack; then
+  if [[ $using_jack == "true" ]]; then
     suffix="jack"
   fi
   echo "$str.$suffix"
@@ -127,6 +122,9 @@
   fi
 done
 
+# Make sure the debuggee doesn't clean up what the debugger has generated.
+art_debugee="$art_debugee --no-clean"
+
 # For the host:
 #
 # If, on the other hand, there is a variant set, use it to modify the art_debugee parameter to
@@ -166,7 +164,7 @@
   art_debugee="$art_debugee -verbose:jdwp"
 fi
 
-if $using_jack; then
+if [[ $using_jack == "true" ]]; then
   toolchain_args="--toolchain jack --language JN --jack-arg -g"
 else
   toolchain_args="--toolchain jdk --language CUR"
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index d549098..eecdd2f 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -19,10 +19,8 @@
   exit 1
 fi
 
-if [ -z "$ANDROID_JAVA_TOOLCHAIN" ] ; then
-  source build/envsetup.sh
-  setpaths # include platform prebuilt java, javac, etc in $PATH.
-fi
+source build/envsetup.sh >&/dev/null # for get_build_var, setpaths
+setpaths # include platform prebuilt java, javac, etc in $PATH.
 
 if [ -z "$ANDROID_PRODUCT_OUT" ] ; then
   JAVA_LIBRARIES=out/target/common/obj/JAVA_LIBRARIES
@@ -30,16 +28,13 @@
   JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
 fi
 
-using_jack=true
-if [[ $ANDROID_COMPILE_WITH_JACK == false ]]; then
-  using_jack=false
-fi
+using_jack=$(get_build_var ANDROID_COMPILE_WITH_JACK)
 
 function classes_jar_path {
   local var="$1"
   local suffix="jar"
 
-  if $using_jack; then
+  if [[ $using_jack == "true" ]]; then
     suffix="jack"
   fi
 
@@ -151,7 +146,7 @@
 vogar_args="$vogar_args --timeout 480"
 
 # Switch between using jack or javac+desugar+dx
-if $using_jack; then
+if [[ $using_jack == "true" ]]; then
   vogar_args="$vogar_args --toolchain jack --language JO"
 else
   vogar_args="$vogar_args --toolchain jdk --language CUR"