Merge "Revert "ART: Blacklist a libcore test for investigation""
diff --git a/Android.mk b/Android.mk
index 25796a0..bb1334a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -93,6 +93,7 @@
 include $(art_path)/tools/dexfuzz/Android.mk
 include $(art_path)/tools/dmtracedump/Android.mk
 include $(art_path)/sigchainlib/Android.mk
+include $(art_path)/libart_fake/Android.mk
 
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 4a98342..951b075 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -606,13 +606,13 @@
     INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsOrdered)
+              (type_flags) | kIntrinsicFlagIsOrdered)
 
     UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
     UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 606302b..03c94a4 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -104,7 +104,7 @@
 
 bool VerificationResults::IsCandidateForCompilation(MethodReference&,
                                                     const uint32_t access_flags) {
-  if (!compiler_options_->IsCompilationEnabled()) {
+  if (!compiler_options_->IsBytecodeCompilationEnabled()) {
     return false;
   }
   // Don't compile class initializers unless kEverything.
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 1ab1d31..d20f510 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -553,8 +553,8 @@
   MethodReference method_ref(&dex_file, method_idx);
 
   if ((access_flags & kAccNative) != 0) {
-    // Are we interpreting only and have support for generic JNI down calls?
-    if (!driver->GetCompilerOptions().IsCompilationEnabled() &&
+    // Are we extracting only and have support for generic JNI down calls?
+    if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
         InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
       // Leaving this empty will trigger the generic JNI version
     } else {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 6bbd3c5..60b700a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -88,8 +88,12 @@
     return compiler_filter_ == CompilerFilter::kVerifyAtRuntime;
   }
 
-  bool IsCompilationEnabled() const {
-    return CompilerFilter::IsCompilationEnabled(compiler_filter_);
+  bool IsBytecodeCompilationEnabled() const {
+    return CompilerFilter::IsBytecodeCompilationEnabled(compiler_filter_);
+  }
+
+  bool IsJniCompilationEnabled() const {
+    return CompilerFilter::IsJniCompilationEnabled(compiler_filter_);
   }
 
   bool IsVerificationEnabled() const {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index be720ad..eaeacc5 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -437,6 +437,9 @@
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
+    CHECK(dex_file_oat_index_map_.find(dex_file) != dex_file_oat_index_map_.end())
+        << "Dex cache should have been pruned " << dex_file->GetLocation()
+        << "; possibly in class path";
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
     size_t oat_index = GetOatIndexForDexCache(dex_cache);
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 371019a..3526802 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -52,7 +52,7 @@
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
         ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
-    const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+    ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
     std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 9d2732a..29411f0 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -31,10 +31,6 @@
   S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
 };
 
-static const SRegister kHFSCalleeSaveRegisters[] = {
-  S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
-};
-
 static const DRegister kHFDArgumentRegisters[] = {
   D0, D1, D2, D3, D4, D5, D6, D7
 };
@@ -42,6 +38,57 @@
 static_assert(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
     "ks d argument registers mismatch");
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    ArmManagedRegister::FromCoreRegister(R5),
+    ArmManagedRegister::FromCoreRegister(R6),
+    ArmManagedRegister::FromCoreRegister(R7),
+    ArmManagedRegister::FromCoreRegister(R8),
+    ArmManagedRegister::FromCoreRegister(R10),
+    ArmManagedRegister::FromCoreRegister(R11),
+    // Hard float registers.
+    ArmManagedRegister::FromSRegister(S16),
+    ArmManagedRegister::FromSRegister(S17),
+    ArmManagedRegister::FromSRegister(S18),
+    ArmManagedRegister::FromSRegister(S19),
+    ArmManagedRegister::FromSRegister(S20),
+    ArmManagedRegister::FromSRegister(S21),
+    ArmManagedRegister::FromSRegister(S22),
+    ArmManagedRegister::FromSRegister(S23),
+    ArmManagedRegister::FromSRegister(S24),
+    ArmManagedRegister::FromSRegister(S25),
+    ArmManagedRegister::FromSRegister(S26),
+    ArmManagedRegister::FromSRegister(S27),
+    ArmManagedRegister::FromSRegister(S28),
+    ArmManagedRegister::FromSRegister(S29),
+    ArmManagedRegister::FromSRegister(S30),
+    ArmManagedRegister::FromSRegister(S31)
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // LR is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << LR;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsCoreRegister()) {
+      result |= (1 << r.AsArm().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsSRegister()) {
+      result |= (1 << r.AsArm().AsSRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -223,32 +270,15 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R5));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R6));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R7));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R8));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R10));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R11));
-
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    callee_save_regs_.push_back(ArmManagedRegister::FromSRegister(kHFSCalleeSaveRegisters[i]));
-  }
 }
 
 uint32_t ArmJniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << R5 | 1 << R6 | 1 << R7 | 1 << R8 | 1 << R10 | 1 << R11 | 1 << LR;
-  return result;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t ArmJniCallingConvention::FpSpillMask() const {
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    result |= (1 << kHFSCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
@@ -269,6 +299,10 @@
                  kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> ArmJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void ArmJniCallingConvention::Next() {
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 35b5093..157880b 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -58,9 +58,7 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -78,9 +76,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 9aef10e..ab56c1c 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -38,10 +38,65 @@
   S0, S1, S2, S3, S4, S5, S6, S7
 };
 
-static const DRegister kDCalleeSaveRegisters[] = {
-  D8, D9, D10, D11, D12, D13, D14, D15
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    // Note: The native jni function may call to some VM runtime functions which may suspend
+    // or trigger GC. And the jni method frame will become top quick frame in those cases.
+    // So we need to satisfy GC to save LR and callee-save registers which is similar to
+    // CalleeSaveMethod(RefOnly) frame.
+    // Jni function is the native function which the java code wants to call.
+    // Jni method is the method that is compiled by jni compiler.
+    // Call chain: managed code(java) --> jni method --> jni function.
+    // Thread register(X19) is saved on stack.
+    Arm64ManagedRegister::FromXRegister(X19),
+    Arm64ManagedRegister::FromXRegister(X20),
+    Arm64ManagedRegister::FromXRegister(X21),
+    Arm64ManagedRegister::FromXRegister(X22),
+    Arm64ManagedRegister::FromXRegister(X23),
+    Arm64ManagedRegister::FromXRegister(X24),
+    Arm64ManagedRegister::FromXRegister(X25),
+    Arm64ManagedRegister::FromXRegister(X26),
+    Arm64ManagedRegister::FromXRegister(X27),
+    Arm64ManagedRegister::FromXRegister(X28),
+    Arm64ManagedRegister::FromXRegister(X29),
+    Arm64ManagedRegister::FromXRegister(LR),
+    // Hard float registers.
+    // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2,
+    // we may break on java_method_2 and we still need to find out the values of DEX registers
+    // in java_method_1. So all callee-saves(in managed code) need to be saved.
+    Arm64ManagedRegister::FromDRegister(D8),
+    Arm64ManagedRegister::FromDRegister(D9),
+    Arm64ManagedRegister::FromDRegister(D10),
+    Arm64ManagedRegister::FromDRegister(D11),
+    Arm64ManagedRegister::FromDRegister(D12),
+    Arm64ManagedRegister::FromDRegister(D13),
+    Arm64ManagedRegister::FromDRegister(D14),
+    Arm64ManagedRegister::FromDRegister(D15),
 };
 
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  uint32_t result = 0u;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsXRegister()) {
+      result |= (1 << r.AsArm64().AsXRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsDRegister()) {
+      result |= (1 << r.AsArm64().AsDRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Arm64ManagedRegister::FromXRegister(X20);  // saved on entry restored on exit
@@ -157,47 +212,14 @@
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
                                                      const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  uint32_t core_spill_mask = CoreSpillMask();
-  DCHECK_EQ(XZR, kNumberOfXRegisters - 1);  // Exclude XZR from the loop (avoid 1 << 32).
-  for (int x_reg = 0; x_reg < kNumberOfXRegisters - 1; ++x_reg) {
-    if (((1 << x_reg) & core_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromXRegister(static_cast<XRegister>(x_reg)));
-    }
-  }
-
-  uint32_t fp_spill_mask = FpSpillMask();
-  for (int d_reg = 0; d_reg < kNumberOfDRegisters; ++d_reg) {
-    if (((1 << d_reg) & fp_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromDRegister(static_cast<DRegister>(d_reg)));
-    }
-  }
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor.
-  // Note: The native jni function may call to some VM runtime functions which may suspend
-  // or trigger GC. And the jni method frame will become top quick frame in those cases.
-  // So we need to satisfy GC to save LR and callee-save registers which is similar to
-  // CalleeSaveMethod(RefOnly) frame.
-  // Jni function is the native function which the java code wants to call.
-  // Jni method is the method that compiled by jni compiler.
-  // Call chain: managed code(java) --> jni method --> jni function.
-  // Thread register(X19) is saved on stack.
-  return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
-         1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t Arm64JniCallingConvention::FpSpillMask() const {
-  // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2, we may
-  // break on java_method_2 and we still need to find out the values of DEX registers in
-  // java_method_1. So all callee-saves(in managed code) need to be saved.
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kDCalleeSaveRegisters); ++i) {
-    result |= (1 << kDCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
@@ -218,6 +240,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
     return (itr_float_and_doubles_ < 8);
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 37c92b2..337e881 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -57,9 +57,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -77,9 +75,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 2c4b15c..e8f738d 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -17,12 +17,11 @@
 #ifndef ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
-#include <vector>
-
 #include "base/arena_object.h"
 #include "handle_scope.h"
 #include "primitive.h"
 #include "thread.h"
+#include "utils/array_ref.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -301,7 +300,7 @@
   virtual bool RequiresSmallResultTypeExtension() const = 0;
 
   // Callee save registers to spill prior to native code (which may clobber)
-  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
+  virtual ArrayRef<const ManagedRegister> CalleeSaveRegisters() const = 0;
 
   // Spill mask values
   virtual uint32_t CoreSpillMask() const = 0;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 27714b8..4311a34 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -112,7 +112,7 @@
 
   // 1. Build the frame saving all callee saves
   const size_t frame_size(main_jni_conv->FrameSize());
-  const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
+  ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 2d31a98..3d4d140 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -27,6 +27,32 @@
 static const FRegister kFArgumentRegisters[] = { F12, F14 };
 static const DRegister kDArgumentRegisters[] = { D6, D7 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    MipsManagedRegister::FromCoreRegister(S2),
+    MipsManagedRegister::FromCoreRegister(S3),
+    MipsManagedRegister::FromCoreRegister(S4),
+    MipsManagedRegister::FromCoreRegister(S5),
+    MipsManagedRegister::FromCoreRegister(S6),
+    MipsManagedRegister::FromCoreRegister(S7),
+    MipsManagedRegister::FromCoreRegister(FP),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips().IsCoreRegister()) {
+      result |= (1 << r.AsMips().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister MipsManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return MipsManagedRegister::FromCoreRegister(T9);
@@ -161,21 +187,14 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S2));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S3));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S4));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S5));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S6));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S7));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(FP));
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << FP | 1 << RA;
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t MipsJniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const {
@@ -196,6 +215,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> MipsJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void MipsJniCallingConvention::Next() {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index dc45432..5c128b0 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -58,14 +58,10 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -80,9 +76,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 807d740..f2e1da8 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -31,6 +31,33 @@
   F12, F13, F14, F15, F16, F17, F18, F19
 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    Mips64ManagedRegister::FromGpuRegister(S2),
+    Mips64ManagedRegister::FromGpuRegister(S3),
+    Mips64ManagedRegister::FromGpuRegister(S4),
+    Mips64ManagedRegister::FromGpuRegister(S5),
+    Mips64ManagedRegister::FromGpuRegister(S6),
+    Mips64ManagedRegister::FromGpuRegister(S7),
+    Mips64ManagedRegister::FromGpuRegister(GP),
+    Mips64ManagedRegister::FromGpuRegister(S8),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips64().IsGpuRegister()) {
+      result |= (1 << r.AsMips64().AsGpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister Mips64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Mips64ManagedRegister::FromGpuRegister(T9);
@@ -126,22 +153,14 @@
 Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8));
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
-  DCHECK_EQ(static_cast<size_t>(POPCOUNT(result)), callee_save_regs_.size() + 1);
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t Mips64JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Mips64JniCallingConvention::ReturnScratchRegister() const {
@@ -162,6 +181,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Mips64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Mips64JniCallingConvention::IsCurrentParamInRegister() {
   return itr_args_ < 8;
 }
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index 3d6aab7..99ea3cd 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -57,14 +57,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -79,9 +75,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Mips64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 322caca..22c7cd0 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -23,6 +23,28 @@
 namespace art {
 namespace x86 {
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86ManagedRegister::FromCpuRegister(EBP),
+    X86ManagedRegister::FromCpuRegister(ESI),
+    X86ManagedRegister::FromCpuRegister(EDI),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86().IsCpuRegister()) {
+      result |= (1 << r.AsX86().AsCpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 
 ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -169,13 +191,14 @@
 X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
                                                  const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EBP));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(ESI));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EDI));
 }
 
 uint32_t X86JniCallingConvention::CoreSpillMask() const {
-  return 1 << EBP | 1 << ESI | 1 << EDI | 1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t X86JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 size_t X86JniCallingConvention::FrameSize() {
@@ -192,6 +215,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
   return false;  // Everything is passed by stack.
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cdf0956..9d678b7 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -59,14 +59,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -81,9 +77,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index b6b11ca..cc4d232 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -24,6 +24,45 @@
 namespace art {
 namespace x86_64 {
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86_64ManagedRegister::FromCpuRegister(RBX),
+    X86_64ManagedRegister::FromCpuRegister(RBP),
+    X86_64ManagedRegister::FromCpuRegister(R12),
+    X86_64ManagedRegister::FromCpuRegister(R13),
+    X86_64ManagedRegister::FromCpuRegister(R14),
+    X86_64ManagedRegister::FromCpuRegister(R15),
+    // Hard float registers.
+    X86_64ManagedRegister::FromXmmRegister(XMM12),
+    X86_64ManagedRegister::FromXmmRegister(XMM13),
+    X86_64ManagedRegister::FromXmmRegister(XMM14),
+    X86_64ManagedRegister::FromXmmRegister(XMM15),
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsCpuRegister()) {
+      result |= (1 << r.AsX86_64().AsCpuRegister().AsRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsXmmRegister()) {
+      result |= (1 << r.AsX86_64().AsXmmRegister().AsFloatRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -125,25 +164,14 @@
 X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBX));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBP));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
-  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 |
-      1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t X86_64JniCallingConvention::FpSpillMask() const {
-  return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+  return kFpCalleeSpillMask;
 }
 
 size_t X86_64JniCallingConvention::FrameSize() {
@@ -160,6 +188,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86_64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86_64JniCallingConvention::IsCurrentParamInRegister() {
   return !IsCurrentParamOnStack();
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 6e47c9f..e2d3d48 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -55,9 +55,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -75,9 +73,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention);
 };
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 5b19284..21e198c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -448,23 +448,23 @@
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
-    InstructionSet insn_set = kX86;
-    std::string error_msg;
-    std::unique_ptr<const InstructionSetFeatures> insn_features(
-        InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
-    ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-    std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
-                                                            insn_features.get(),
-                                                            0u,
-                                                            nullptr));
-    ASSERT_NE(oat_header.get(), nullptr);
-    ASSERT_TRUE(oat_header->IsValid());
+  InstructionSet insn_set = kX86;
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> insn_features(
+    InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
+  std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
+                                                          insn_features.get(),
+                                                          0u,
+                                                          nullptr));
+  ASSERT_NE(oat_header.get(), nullptr);
+  ASSERT_TRUE(oat_header->IsValid());
 
-    char* magic = const_cast<char*>(oat_header->GetMagic());
-    strcpy(magic, "");  // bad magic
-    ASSERT_FALSE(oat_header->IsValid());
-    strcpy(magic, "oat\n000");  // bad version
-    ASSERT_FALSE(oat_header->IsValid());
+  char* magic = const_cast<char*>(oat_header->GetMagic());
+  strcpy(magic, "");  // bad magic
+  ASSERT_FALSE(oat_header->IsValid());
+  strcpy(magic, "oat\n000");  // bad version
+  ASSERT_FALSE(oat_header->IsValid());
 }
 
 TEST_F(OatTest, EmptyTextSection) {
@@ -766,4 +766,28 @@
   TestZipFileInput(true);
 }
 
+TEST_F(OatTest, UpdateChecksum) {
+  InstructionSet insn_set = kX86;
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> insn_features(
+    InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
+  std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
+                                                          insn_features.get(),
+                                                          0u,
+                                                          nullptr));
+  // The starting adler32 value is 1.
+  EXPECT_EQ(1U, oat_header->GetChecksum());
+
+  oat_header->UpdateChecksum(OatHeader::kOatMagic, sizeof(OatHeader::kOatMagic));
+  EXPECT_EQ(64291151U, oat_header->GetChecksum());
+
+  // Make sure that null data does not reset the checksum.
+  oat_header->UpdateChecksum(nullptr, 0);
+  EXPECT_EQ(64291151U, oat_header->GetChecksum());
+
+  oat_header->UpdateChecksum(OatHeader::kOatMagic, sizeof(OatHeader::kOatMagic));
+  EXPECT_EQ(216138397U, oat_header->GetChecksum());
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 8da9f06..4232002 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -63,6 +63,29 @@
     return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
 }
 
+class ChecksumUpdatingOutputStream : public OutputStream {
+ public:
+  ChecksumUpdatingOutputStream(OutputStream* out, OatHeader* oat_header)
+      : OutputStream(out->GetLocation()), out_(out), oat_header_(oat_header) { }
+
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    oat_header_->UpdateChecksum(buffer, byte_count);
+    return out_->WriteFully(buffer, byte_count);
+  }
+
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    return out_->Seek(offset, whence);
+  }
+
+  bool Flush() OVERRIDE {
+    return out_->Flush();
+  }
+
+ private:
+  OutputStream* const out_;
+  OatHeader* const oat_header_;
+};
+
 }  // anonymous namespace
 
 // Defines the location of the raw dex file to write.
@@ -422,13 +445,21 @@
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
     oat_dex_file.ReserveClassOffsets(this);
   }
-  if (!WriteOatDexFiles(rodata) ||
+  ChecksumUpdatingOutputStream checksum_updating_rodata(rodata, oat_header_.get());
+  if (!WriteOatDexFiles(&checksum_updating_rodata) ||
       !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
       !OpenDexFiles(file, verify, &dex_files_map, &dex_files) ||
       !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
     return false;
   }
 
+  // Do a bulk checksum update for Dex[] and TypeLookupTable[]. Doing it piece by
+  // piece would be difficult because we're not using the OutpuStream directly.
+  if (!oat_dex_files_.empty()) {
+    size_t size = size_after_type_lookup_tables - oat_dex_files_[0].dex_file_offset_;
+    oat_header_->UpdateChecksum(dex_files_map->Begin(), size);
+  }
+
   *opened_dex_files_map = std::move(dex_files_map);
   *opened_dex_files = std::move(dex_files);
   write_state_ = WriteState::kPrepareLayout;
@@ -996,7 +1027,7 @@
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         const OatQuickMethodHeader& method_header =
             oat_class->method_headers_[method_offsets_index_];
-        if (!writer_->WriteData(out, &method_header, sizeof(method_header))) {
+        if (!out->WriteFully(&method_header, sizeof(method_header))) {
           ReportWriteFailure("method header", it);
           return false;
         }
@@ -1063,7 +1094,7 @@
           }
         }
 
-        if (!writer_->WriteData(out, quick_code.data(), code_size)) {
+        if (!out->WriteFully(quick_code.data(), code_size)) {
           ReportWriteFailure("method code", it);
           return false;
         }
@@ -1279,7 +1310,7 @@
         size_t map_size = map.size() * sizeof(map[0]);
         if (map_offset == offset_) {
           // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
-          if (UNLIKELY(!writer_->WriteData(out, map.data(), map_size))) {
+          if (UNLIKELY(!out->WriteFully(map.data(), map_size))) {
             ReportWriteFailure(it);
             return false;
           }
@@ -1413,8 +1444,8 @@
       offset = CompiledCode::AlignCode(offset, instruction_set); \
       adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
       oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
-      field = compiler_driver_->Create ## fn_name(); \
-      offset += field->size();
+      (field) = compiler_driver_->Create ## fn_name(); \
+      offset += (field)->size();
 
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline);
@@ -1457,6 +1488,10 @@
 bool OatWriter::WriteRodata(OutputStream* out) {
   CHECK(write_state_ == WriteState::kWriteRoData);
 
+  // Wrap out to update checksum with each write.
+  ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
+  out = &checksum_updating_out;
+
   if (!WriteClassOffsets(out)) {
     LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
@@ -1499,6 +1534,10 @@
 bool OatWriter::WriteCode(OutputStream* out) {
   CHECK(write_state_ == WriteState::kWriteText);
 
+  // Wrap out to update checksum with each write.
+  ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
+  out = &checksum_updating_out;
+
   SetMultiOatRelativePatcherAdjustment();
 
   const size_t file_offset = oat_data_offset_;
@@ -1526,8 +1565,8 @@
   if (kIsDebugBuild) {
     uint32_t size_total = 0;
     #define DO_STAT(x) \
-      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << x << "B)"; \
-      size_total += x;
+      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << (x) << "B)"; \
+      size_total += (x);
 
     DO_STAT(size_dex_file_alignment_);
     DO_STAT(size_executable_offset_alignment_);
@@ -1683,12 +1722,12 @@
         uint32_t alignment_padding = aligned_offset - relative_offset; \
         out->Seek(alignment_padding, kSeekCurrent); \
         size_trampoline_alignment_ += alignment_padding; \
-        if (!WriteData(out, field->data(), field->size())) { \
+        if (!out->WriteFully((field)->data(), (field)->size())) { \
           PLOG(ERROR) << "Failed to write " # field " to " << out->GetLocation(); \
           return false; \
         } \
-        size_ ## field += field->size(); \
-        relative_offset += alignment_padding + field->size(); \
+        size_ ## field += (field)->size(); \
+        relative_offset += alignment_padding + (field)->size(); \
         DCHECK_OFFSET(); \
       } while (false)
 
@@ -2200,11 +2239,6 @@
   return true;
 }
 
-bool OatWriter::WriteData(OutputStream* out, const void* data, size_t size) {
-  oat_header_->UpdateChecksum(data, size);
-  return out->WriteFully(data, size);
-}
-
 void OatWriter::SetMultiOatRelativePatcherAdjustment() {
   DCHECK(dex_files_ != nullptr);
   DCHECK(relative_patcher_ != nullptr);
@@ -2274,39 +2308,37 @@
   const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
 
-  if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
+  if (!out->WriteFully(&dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
 
-  if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
+  if (!out->WriteFully(dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
 
-  if (!oat_writer->WriteData(out,
-                             &dex_file_location_checksum_,
-                             sizeof(dex_file_location_checksum_))) {
+  if (!out->WriteFully(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_))) {
     PLOG(ERROR) << "Failed to write dex file location checksum to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
 
-  if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
+  if (!out->WriteFully(&dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
 
-  if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+  if (!out->WriteFully(&class_offsets_offset_, sizeof(class_offsets_offset_))) {
     PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
 
-  if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
+  if (!out->WriteFully(&lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
@@ -2316,7 +2348,7 @@
 }
 
 bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
-  if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
+  if (!out->WriteFully(class_offsets_.data(), GetClassOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
                 << " to " << out->GetLocation();
     return false;
@@ -2405,13 +2437,13 @@
                                 OutputStream* out,
                                 const size_t file_offset) const {
   DCHECK_OFFSET_();
-  if (!oat_writer->WriteData(out, &status_, sizeof(status_))) {
+  if (!out->WriteFully(&status_, sizeof(status_))) {
     PLOG(ERROR) << "Failed to write class status to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_status_ += sizeof(status_);
 
-  if (!oat_writer->WriteData(out, &type_, sizeof(type_))) {
+  if (!out->WriteFully(&type_, sizeof(type_))) {
     PLOG(ERROR) << "Failed to write oat class type to " << out->GetLocation();
     return false;
   }
@@ -2419,20 +2451,20 @@
 
   if (method_bitmap_size_ != 0) {
     CHECK_EQ(kOatClassSomeCompiled, type_);
-    if (!oat_writer->WriteData(out, &method_bitmap_size_, sizeof(method_bitmap_size_))) {
+    if (!out->WriteFully(&method_bitmap_size_, sizeof(method_bitmap_size_))) {
       PLOG(ERROR) << "Failed to write method bitmap size to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += sizeof(method_bitmap_size_);
 
-    if (!oat_writer->WriteData(out, method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
+    if (!out->WriteFully(method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
       PLOG(ERROR) << "Failed to write method bitmap to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += method_bitmap_size_;
   }
 
-  if (!oat_writer->WriteData(out, method_offsets_.data(), GetMethodOffsetsRawSize())) {
+  if (!out->WriteFully(method_offsets_.data(), GetMethodOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write method offsets to " << out->GetLocation();
     return false;
   }
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 3862798..cc81f39 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -271,7 +271,6 @@
   bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
-  bool WriteData(OutputStream* out, const void* data, size_t size);
   void SetMultiOatRelativePatcherAdjustment();
 
   enum class WriteState {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 51fbaea..08670a0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1305,4 +1305,18 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
+uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 6e75e3b..82a54d2 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -469,6 +469,9 @@
 
   virtual void GenerateNop() = 0;
 
+  uint32_t GetReferenceSlowFlagOffset() const;
+  uint32_t GetReferenceDisableFlagOffset() const;
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e010662..7ddd677 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,8 @@
 
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
-#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
 
 class NullCheckSlowPathARM : public SlowPathCode {
@@ -674,7 +675,8 @@
 };
 
 #undef __
-#define __ down_cast<ArmAssembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
 
 inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 261c04f..362957b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -132,7 +132,8 @@
   return ARM64ReturnLocation(return_type);
 }
 
-#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
 
 // Calculate memory accessing operand for save/restore live registers.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index fb50680..c3f425a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -141,7 +141,8 @@
   return MipsReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
@@ -478,7 +479,8 @@
 }
 
 #undef __
-#define __ down_cast<MipsAssembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
 
 void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index e67d8d0..bb6df50 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -102,7 +102,8 @@
   return Mips64ReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
@@ -424,7 +425,8 @@
 }
 
 #undef __
-#define __ down_cast<Mips64Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 50892a9..b95c806 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,7 +47,8 @@
 
 static constexpr int kFakeReturnRegister = Register(8);
 
-#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x).Int32Value()
 
 class NullCheckSlowPathX86 : public SlowPathCode {
@@ -691,7 +692,8 @@
 };
 
 #undef __
-#define __ down_cast<X86Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())-> /* NOLINT */
 
 inline Condition X86Condition(IfCondition cond) {
   switch (cond) {
@@ -4308,16 +4310,18 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
-  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
-    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
-    __ movl(temp, Address(ESP, stack_offset));
-    return temp;
+  if (slow_path != nullptr) {
+    if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+      int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+      __ movl(temp, Address(ESP, stack_offset));
+      return temp;
+    }
   }
   return location.AsRegister<Register>();
 }
 
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                  Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -4366,6 +4370,11 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index fe7d3ed..98dc8ca 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -398,6 +398,7 @@
       MethodReference target_method) OVERRIDE;
 
   // Generate a call to a static or direct method.
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 56c5b06..054891b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,7 +51,8 @@
 
 static constexpr int kC2ConditionMask = 0x400;
 
-#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
@@ -710,7 +711,8 @@
 };
 
 #undef __
-#define __ down_cast<X86_64Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
 
 inline Condition X86_64IntegerCondition(IfCondition cond) {
   switch (cond) {
@@ -762,10 +764,9 @@
   }
 }
 
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                     Location temp) {
+Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                     Location temp) {
   // All registers are assumed to be correctly set up.
-
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -815,6 +816,13 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                     Location temp) {
+  // All registers are assumed to be correctly set up.
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d9908bb..7cf1245 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -394,6 +394,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 214250f..83a5127 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -165,7 +165,7 @@
 void Set##name() { SetBit(k##name); }                                 \
 bool Get##name() const { return IsBitSet(k##name); }                  \
 private:                                                              \
-static constexpr size_t k##name = bit + kNumberOfGenericOptimizations
+static constexpr size_t k##name = (bit) + kNumberOfGenericOptimizations
 
 class StringEqualsOptimizations : public IntrinsicOptimizations {
  public:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index de04175..29f7672 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -987,31 +987,126 @@
 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp0 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp1 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+
+  Label loop;
+  Label find_char_diff;
+  Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = locations->InAt(1).AsRegister<Register>();
-  __ cmp(argument, ShifterOperand(0));
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Take slow path and throw if input can be and is null.
+  SlowPathCode* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
+  }
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pStringCompareTo).Int32Value());
-  __ blx(LR);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ subs(out, str, ShifterOperand(arg));
+  __ b(&end, EQ);
+  // Load lengths of this and argument strings.
+  __ ldr(temp2, Address(str, count_offset));
+  __ ldr(temp1, Address(arg, count_offset));
+  // out = length diff.
+  __ subs(out, temp2, ShifterOperand(temp1));
+  // temp0 = min(len(str), len(arg)).
+  __ it(Condition::LT, kItElse);
+  __ mov(temp0, ShifterOperand(temp2), Condition::LT);
+  __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+  // Shorter string is empty?
+  __ CompareAndBranchIfZero(temp0, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ mov(temp1, ShifterOperand(value_offset));
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ sub(temp0, temp0, ShifterOperand(2));
+
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ subs(temp0, temp0, ShifterOperand(2));
+
+  __ b(&loop, GT);
+  __ b(&end);
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ eor(temp1, temp2, ShifterOperand(IP));
+  __ rbit(temp1, temp1);
+  __ clz(temp1, temp1);
+
+  // temp0 = number of 16-bit characters remaining to compare.
+  // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
+  // after the end of the shorter string data).
+
+  // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
+  // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+
+  // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
+  // return length diff (out).
+  __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
+  __ b(&end, LE);
+  // Extract the characters and calculate the difference.
+  __ bic(temp1, temp1, ShifterOperand(0xf));
+  __ Lsr(temp2, temp2, temp1);
+  __ Lsr(IP, IP, temp1);
+  __ movt(temp2, 0);
+  __ movt(IP, 0);
+  __ sub(out, IP, ShifterOperand(temp2));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
@@ -1055,17 +1150,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ CompareAndBranchIfZero(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ CompareAndBranchIfZero(arg, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ ldr(temp, Address(str, class_offset));
-  __ ldr(temp1, Address(arg, class_offset));
-  __ cmp(temp, ShifterOperand(temp1));
-  __ b(&return_false, NE);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ ldr(temp, Address(str, class_offset));
+    __ ldr(temp1, Address(arg, class_offset));
+    __ cmp(temp, ShifterOperand(temp1));
+    __ b(&return_false, NE);
+  }
 
   // Load lengths of this and argument strings.
   __ ldr(temp, Address(str, count_offset));
@@ -1082,7 +1182,7 @@
 
   // Assertions that must hold in order to compare strings 2 characters at a time.
   DCHECK_ALIGNED(value_offset, 4);
-  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
   __ LoadImmediate(temp1, value_offset);
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 6cd1726..d776fb4 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1270,12 +1270,12 @@
   __ Eor(temp1, temp0, temp4);
   __ Rbit(temp1, temp1);
   __ Clz(temp1, temp1);
-  __ Bic(temp1, temp1, 0xf);
   // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
   // the difference occurs outside the remaining string data, so just return length diff (out).
   __ Cmp(temp2, Operand(temp1, LSR, 4));
   __ B(le, &end);
   // Extract the characters and calculate the difference.
+  __ Bic(temp1, temp1, 0xf);
   __ Lsr(temp0, temp0, temp1);
   __ Lsr(temp4, temp4, temp1);
   __ And(temp4, temp4, 0xffff);
@@ -1327,21 +1327,26 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ Cbz(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Cbz(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Cmp(str, arg);
   __ B(&return_true, eq);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Ldr(temp, MemOperand(str.X(), class_offset));
-  __ Ldr(temp1, MemOperand(arg.X(), class_offset));
-  __ Cmp(temp, temp1);
-  __ B(&return_false, ne);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str.X(), class_offset));
+    __ Ldr(temp1, MemOperand(arg.X(), class_offset));
+    __ Cmp(temp, temp1);
+    __ B(&return_false, ne);
+  }
 
   // Load lengths of this and argument strings.
   __ Ldr(temp, MemOperand(str.X(), count_offset));
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index fa250a3..140f56a 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2433,13 +2433,128 @@
   GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
 }
 
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister half = locations->GetTemp(0).AsFpuRegister<FRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  MipsLabel done;
+  MipsLabel finite;
+  MipsLabel add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor.w.s(in);
+  //
+  // /*
+  //  * This "if" statement is only needed for the pre-R6 version of floor.w.s
+  //  * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+  //  * too large to fit in a 32-bit integer.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-bit signed
+  //  * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
+  //  * and will no longer be processed by this "if" statement.
+  //  */
+  // if (out == Integer.MAX_VALUE) {
+  //   TMP = (in < 0.0f) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * Integer.MAX_VALUE to Integer.MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (IsR6()) {
+    __ CmpUnS(FTMP, in, in);
+  } else {
+    __ CunS(in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  if (IsR6()) {
+    __ Bc1nez(FTMP, &done);
+  } else {
+    __ Bc1t(&done);
+  }
+
+  // out = floor(in);
+  __ FloorWS(FTMP, in);
+  __ Mfc1(out, FTMP);
+
+  __ LoadConst32(TMP, 1);
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  __ Bne(AT, out, &finite);
+
+  __ Mtc1(ZERO, FTMP);
+  if (IsR6()) {
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(AT, FTMP);
+  } else {
+    __ ColtS(in, FTMP);
+  }
+
+  __ B(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5f <= (in - out)) ? 1 : 0;
+  __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+  __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+  __ SubS(FTMP, in, FTMP);
+  __ Mtc1(AT, half);
+  if (IsR6()) {
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(AT, FTMP);
+  } else {
+    __ ColeS(half, FTMP);
+  }
+
+  __ Bind(&add);
+
+  if (IsR6()) {
+    __ Selnez(TMP, TMP, AT);
+  } else {
+    __ Movf(TMP, ZERO);
+  }
+
+  // Return out += TMP.
+  __ Addu(out, out, TMP);
+
+  __ Bind(&done);
+}
+
 // Unimplemented intrinsics.
 
 UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundFloat)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 99bc40e..05377f9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1319,11 +1319,11 @@
     __ j(kEqual, &return_false);
   }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
   if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
     __ movl(ecx, Address(str, class_offset));
     __ cmpl(ecx, Address(arg, class_offset));
     __ j(kNotEqual, &return_false);
@@ -2631,8 +2631,66 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
+void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86Assembler* assembler = GetAssembler();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  Register temp = temp_loc.AsRegister<Register>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 06e9cc2..67c2f3a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1416,17 +1416,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ testl(arg, arg);
-  __ j(kEqual, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ testl(arg, arg);
+    __ j(kEqual, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ movl(rcx, Address(str, class_offset));
-  __ cmpl(rcx, Address(arg, class_offset));
-  __ j(kNotEqual, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ movl(rcx, Address(str, class_offset));
+    __ cmpl(rcx, Address(arg, class_offset));
+    __ j(kNotEqual, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ cmpl(str, arg);
@@ -2719,7 +2724,65 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
+void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 12ea059..c08323a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2290,7 +2290,7 @@
 
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
 // instruction that branches to the exit block.
-class HReturnVoid : public HTemplateInstruction<0> {
+class HReturnVoid FINAL : public HTemplateInstruction<0> {
  public:
   explicit HReturnVoid(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {}
@@ -2305,7 +2305,7 @@
 
 // Represents dex's RETURN opcodes. A HReturn is a control flow
 // instruction that branches to the exit block.
-class HReturn : public HTemplateInstruction<1> {
+class HReturn FINAL : public HTemplateInstruction<1> {
  public:
   explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2320,7 +2320,7 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
-class HPhi : public HInstruction {
+class HPhi FINAL : public HInstruction {
  public:
   HPhi(ArenaAllocator* arena,
        uint32_t reg_number,
@@ -2424,7 +2424,7 @@
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
-class HExit : public HTemplateInstruction<0> {
+class HExit FINAL : public HTemplateInstruction<0> {
  public:
   explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2437,7 +2437,7 @@
 };
 
 // Jumps from one block to another.
-class HGoto : public HTemplateInstruction<0> {
+class HGoto FINAL : public HTemplateInstruction<0> {
  public:
   explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2477,7 +2477,7 @@
   DISALLOW_COPY_AND_ASSIGN(HConstant);
 };
 
-class HNullConstant : public HConstant {
+class HNullConstant FINAL : public HConstant {
  public:
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
@@ -2501,7 +2501,7 @@
 
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
-class HIntConstant : public HConstant {
+class HIntConstant FINAL : public HConstant {
  public:
   int32_t GetValue() const { return value_; }
 
@@ -2542,7 +2542,7 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
-class HLongConstant : public HConstant {
+class HLongConstant FINAL : public HConstant {
  public:
   int64_t GetValue() const { return value_; }
 
@@ -2572,7 +2572,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
-class HFloatConstant : public HConstant {
+class HFloatConstant FINAL : public HConstant {
  public:
   float GetValue() const { return value_; }
 
@@ -2625,7 +2625,7 @@
   DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
 };
 
-class HDoubleConstant : public HConstant {
+class HDoubleConstant FINAL : public HConstant {
  public:
   double GetValue() const { return value_; }
 
@@ -2678,7 +2678,7 @@
 
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
-class HIf : public HTemplateInstruction<1> {
+class HIf FINAL : public HTemplateInstruction<1> {
  public:
   explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2707,7 +2707,7 @@
 // non-exceptional control flow.
 // Normal-flow successor is stored at index zero, exception handlers under
 // higher indices in no particular order.
-class HTryBoundary : public HTemplateInstruction<0> {
+class HTryBoundary FINAL : public HTemplateInstruction<0> {
  public:
   enum class BoundaryKind {
     kEntry,
@@ -2765,7 +2765,7 @@
 };
 
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HTemplateInstruction<1> {
  public:
   // We set CanTriggerGC to prevent any intermediate address to be live
   // at the point of the `HDeoptimize`.
@@ -2790,7 +2790,7 @@
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
-class HCurrentMethod : public HExpression<0> {
+class HCurrentMethod FINAL : public HExpression<0> {
  public:
   explicit HCurrentMethod(Primitive::Type type, uint32_t dex_pc = kNoDexPc)
       : HExpression(type, SideEffects::None(), dex_pc) {}
@@ -2803,7 +2803,7 @@
 
 // Fetches an ArtMethod from the virtual table or the interface method table
 // of a class.
-class HClassTableGet : public HExpression<1> {
+class HClassTableGet FINAL : public HExpression<1> {
  public:
   enum class TableKind {
     kVTable,
@@ -2850,7 +2850,7 @@
 // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
 // have one successor for each entry in the switch table, and the final successor
 // will be the block containing the next Dex opcode.
-class HPackedSwitch : public HTemplateInstruction<1> {
+class HPackedSwitch FINAL : public HTemplateInstruction<1> {
  public:
   HPackedSwitch(int32_t start_value,
                 uint32_t num_entries,
@@ -3095,7 +3095,7 @@
 };
 
 // Instruction to check if two inputs are equal to each other.
-class HEqual : public HCondition {
+class HEqual FINAL : public HCondition {
  public:
   HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3139,7 +3139,7 @@
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
 
-class HNotEqual : public HCondition {
+class HNotEqual FINAL : public HCondition {
  public:
   HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3182,7 +3182,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
 
-class HLessThan : public HCondition {
+class HLessThan FINAL : public HCondition {
  public:
   HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3219,7 +3219,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
 
-class HLessThanOrEqual : public HCondition {
+class HLessThanOrEqual FINAL : public HCondition {
  public:
   HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3256,7 +3256,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
 
-class HGreaterThan : public HCondition {
+class HGreaterThan FINAL : public HCondition {
  public:
   HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3293,7 +3293,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
 
-class HGreaterThanOrEqual : public HCondition {
+class HGreaterThanOrEqual FINAL : public HCondition {
  public:
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3330,7 +3330,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
 
-class HBelow : public HCondition {
+class HBelow FINAL : public HCondition {
  public:
   HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3370,7 +3370,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBelow);
 };
 
-class HBelowOrEqual : public HCondition {
+class HBelowOrEqual FINAL : public HCondition {
  public:
   HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3410,7 +3410,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
 };
 
-class HAbove : public HCondition {
+class HAbove FINAL : public HCondition {
  public:
   HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3450,7 +3450,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAbove);
 };
 
-class HAboveOrEqual : public HCondition {
+class HAboveOrEqual FINAL : public HCondition {
  public:
   HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3492,7 +3492,7 @@
 
 // Instruction to check how two inputs compare to each other.
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
-class HCompare : public HBinaryOperation {
+class HCompare FINAL : public HBinaryOperation {
  public:
   // Note that `comparison_type` is the type of comparison performed
   // between the comparison's inputs, not the type of the instantiated
@@ -3581,7 +3581,7 @@
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-class HNewInstance : public HExpression<2> {
+class HNewInstance FINAL : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
                HCurrentMethod* current_method,
@@ -3784,7 +3784,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
 };
 
-class HInvokeUnresolved : public HInvoke {
+class HInvokeUnresolved FINAL : public HInvoke {
  public:
   HInvokeUnresolved(ArenaAllocator* arena,
                     uint32_t number_of_arguments,
@@ -3807,7 +3807,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeUnresolved);
 };
 
-class HInvokeStaticOrDirect : public HInvoke {
+class HInvokeStaticOrDirect FINAL : public HInvoke {
  public:
   // Requirements of this method call regarding the class
   // initialization (clinit) check of its declaring class.
@@ -4096,7 +4096,7 @@
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
-class HInvokeVirtual : public HInvoke {
+class HInvokeVirtual FINAL : public HInvoke {
  public:
   HInvokeVirtual(ArenaAllocator* arena,
                  uint32_t number_of_arguments,
@@ -4122,7 +4122,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual);
 };
 
-class HInvokeInterface : public HInvoke {
+class HInvokeInterface FINAL : public HInvoke {
  public:
   HInvokeInterface(ArenaAllocator* arena,
                    uint32_t number_of_arguments,
@@ -4149,7 +4149,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNeg : public HUnaryOperation {
+class HNeg FINAL : public HUnaryOperation {
  public:
   HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {
@@ -4177,7 +4177,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNeg);
 };
 
-class HNewArray : public HExpression<2> {
+class HNewArray FINAL : public HExpression<2> {
  public:
   HNewArray(HInstruction* length,
             HCurrentMethod* current_method,
@@ -4216,7 +4216,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNewArray);
 };
 
-class HAdd : public HBinaryOperation {
+class HAdd FINAL : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type,
        HInstruction* left,
@@ -4251,7 +4251,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAdd);
 };
 
-class HSub : public HBinaryOperation {
+class HSub FINAL : public HBinaryOperation {
  public:
   HSub(Primitive::Type result_type,
        HInstruction* left,
@@ -4284,7 +4284,7 @@
   DISALLOW_COPY_AND_ASSIGN(HSub);
 };
 
-class HMul : public HBinaryOperation {
+class HMul FINAL : public HBinaryOperation {
  public:
   HMul(Primitive::Type result_type,
        HInstruction* left,
@@ -4319,7 +4319,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMul);
 };
 
-class HDiv : public HBinaryOperation {
+class HDiv FINAL : public HBinaryOperation {
  public:
   HDiv(Primitive::Type result_type,
        HInstruction* left,
@@ -4371,7 +4371,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
-class HRem : public HBinaryOperation {
+class HRem FINAL : public HBinaryOperation {
  public:
   HRem(Primitive::Type result_type,
        HInstruction* left,
@@ -4422,7 +4422,7 @@
   DISALLOW_COPY_AND_ASSIGN(HRem);
 };
 
-class HDivZeroCheck : public HExpression<1> {
+class HDivZeroCheck FINAL : public HExpression<1> {
  public:
   // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
   // constructor.
@@ -4448,7 +4448,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
-class HShl : public HBinaryOperation {
+class HShl FINAL : public HBinaryOperation {
  public:
   HShl(Primitive::Type result_type,
        HInstruction* value,
@@ -4494,7 +4494,7 @@
   DISALLOW_COPY_AND_ASSIGN(HShl);
 };
 
-class HShr : public HBinaryOperation {
+class HShr FINAL : public HBinaryOperation {
  public:
   HShr(Primitive::Type result_type,
        HInstruction* value,
@@ -4540,7 +4540,7 @@
   DISALLOW_COPY_AND_ASSIGN(HShr);
 };
 
-class HUShr : public HBinaryOperation {
+class HUShr FINAL : public HBinaryOperation {
  public:
   HUShr(Primitive::Type result_type,
         HInstruction* value,
@@ -4588,7 +4588,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUShr);
 };
 
-class HAnd : public HBinaryOperation {
+class HAnd FINAL : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type,
        HInstruction* left,
@@ -4625,7 +4625,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAnd);
 };
 
-class HOr : public HBinaryOperation {
+class HOr FINAL : public HBinaryOperation {
  public:
   HOr(Primitive::Type result_type,
       HInstruction* left,
@@ -4662,7 +4662,7 @@
   DISALLOW_COPY_AND_ASSIGN(HOr);
 };
 
-class HXor : public HBinaryOperation {
+class HXor FINAL : public HBinaryOperation {
  public:
   HXor(Primitive::Type result_type,
        HInstruction* left,
@@ -4699,7 +4699,7 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
-class HRor : public HBinaryOperation {
+class HRor FINAL : public HBinaryOperation {
  public:
   HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
     : HBinaryOperation(result_type, value, distance) {
@@ -4752,7 +4752,7 @@
 
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
-class HParameterValue : public HExpression<0> {
+class HParameterValue FINAL : public HExpression<0> {
  public:
   HParameterValue(const DexFile& dex_file,
                   uint16_t type_index,
@@ -4794,7 +4794,7 @@
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
-class HNot : public HUnaryOperation {
+class HNot FINAL : public HUnaryOperation {
  public:
   HNot(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {}
@@ -4827,7 +4827,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNot);
 };
 
-class HBooleanNot : public HUnaryOperation {
+class HBooleanNot FINAL : public HUnaryOperation {
  public:
   explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {}
@@ -4864,7 +4864,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBooleanNot);
 };
 
-class HTypeConversion : public HExpression<1> {
+class HTypeConversion FINAL : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
   HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
@@ -4907,7 +4907,7 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HNullCheck : public HExpression<1> {
+class HNullCheck FINAL : public HExpression<1> {
  public:
   // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
   // constructor.
@@ -4969,7 +4969,7 @@
   const Handle<mirror::DexCache> dex_cache_;
 };
 
-class HInstanceFieldGet : public HExpression<1> {
+class HInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HInstanceFieldGet(HInstruction* value,
                     Primitive::Type field_type,
@@ -5021,7 +5021,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet);
 };
 
-class HInstanceFieldSet : public HTemplateInstruction<2> {
+class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HInstanceFieldSet(HInstruction* object,
                     HInstruction* value,
@@ -5072,7 +5072,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet);
 };
 
-class HArrayGet : public HExpression<2> {
+class HArrayGet FINAL : public HExpression<2> {
  public:
   HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type, uint32_t dex_pc)
       : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
@@ -5118,7 +5118,7 @@
   DISALLOW_COPY_AND_ASSIGN(HArrayGet);
 };
 
-class HArraySet : public HTemplateInstruction<3> {
+class HArraySet FINAL : public HTemplateInstruction<3> {
  public:
   HArraySet(HInstruction* array,
             HInstruction* index,
@@ -5218,7 +5218,7 @@
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
 
-class HArrayLength : public HExpression<1> {
+class HArrayLength FINAL : public HExpression<1> {
  public:
   HArrayLength(HInstruction* array, uint32_t dex_pc)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
@@ -5254,7 +5254,7 @@
   DISALLOW_COPY_AND_ASSIGN(HArrayLength);
 };
 
-class HBoundsCheck : public HExpression<2> {
+class HBoundsCheck FINAL : public HExpression<2> {
  public:
   // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
   // constructor.
@@ -5282,7 +5282,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBoundsCheck);
 };
 
-class HSuspendCheck : public HTemplateInstruction<0> {
+class HSuspendCheck FINAL : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {}
@@ -5324,7 +5324,7 @@
 /**
  * Instruction to load a Class object.
  */
-class HLoadClass : public HExpression<1> {
+class HLoadClass FINAL : public HExpression<1> {
  public:
   HLoadClass(HCurrentMethod* current_method,
              uint16_t type_index,
@@ -5428,7 +5428,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
 
-class HLoadString : public HExpression<1> {
+class HLoadString FINAL : public HExpression<1> {
  public:
   // Determines how to load the String.
   enum class LoadKind {
@@ -5630,7 +5630,7 @@
 /**
  * Performs an initialization check on its Class object input.
  */
-class HClinitCheck : public HExpression<1> {
+class HClinitCheck FINAL : public HExpression<1> {
  public:
   HClinitCheck(HLoadClass* constant, uint32_t dex_pc)
       : HExpression(
@@ -5660,7 +5660,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClinitCheck);
 };
 
-class HStaticFieldGet : public HExpression<1> {
+class HStaticFieldGet FINAL : public HExpression<1> {
  public:
   HStaticFieldGet(HInstruction* cls,
                   Primitive::Type field_type,
@@ -5709,7 +5709,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldGet);
 };
 
-class HStaticFieldSet : public HTemplateInstruction<2> {
+class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HStaticFieldSet(HInstruction* cls,
                   HInstruction* value,
@@ -5757,7 +5757,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet);
 };
 
-class HUnresolvedInstanceFieldGet : public HExpression<1> {
+class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HUnresolvedInstanceFieldGet(HInstruction* obj,
                               Primitive::Type field_type,
@@ -5782,7 +5782,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet);
 };
 
-class HUnresolvedInstanceFieldSet : public HTemplateInstruction<2> {
+class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HUnresolvedInstanceFieldSet(HInstruction* obj,
                               HInstruction* value,
@@ -5820,7 +5820,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet);
 };
 
-class HUnresolvedStaticFieldGet : public HExpression<0> {
+class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
  public:
   HUnresolvedStaticFieldGet(Primitive::Type field_type,
                             uint32_t field_index,
@@ -5843,7 +5843,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet);
 };
 
-class HUnresolvedStaticFieldSet : public HTemplateInstruction<1> {
+class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> {
  public:
   HUnresolvedStaticFieldSet(HInstruction* value,
                             Primitive::Type field_type,
@@ -5880,7 +5880,7 @@
 };
 
 // Implement the move-exception DEX instruction.
-class HLoadException : public HExpression<0> {
+class HLoadException FINAL : public HExpression<0> {
  public:
   explicit HLoadException(uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc) {}
@@ -5895,7 +5895,7 @@
 
 // Implicit part of move-exception which clears thread-local exception storage.
 // Must not be removed because the runtime expects the TLS to get cleared.
-class HClearException : public HTemplateInstruction<0> {
+class HClearException FINAL : public HTemplateInstruction<0> {
  public:
   explicit HClearException(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {}
@@ -5906,7 +5906,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClearException);
 };
 
-class HThrow : public HTemplateInstruction<1> {
+class HThrow FINAL : public HTemplateInstruction<1> {
  public:
   HThrow(HInstruction* exception, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
@@ -5943,7 +5943,7 @@
 
 std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
 
-class HInstanceOf : public HExpression<2> {
+class HInstanceOf FINAL : public HExpression<2> {
  public:
   HInstanceOf(HInstruction* object,
               HLoadClass* constant,
@@ -5997,7 +5997,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
 };
 
-class HBoundType : public HExpression<1> {
+class HBoundType FINAL : public HExpression<1> {
  public:
   HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc),
@@ -6041,7 +6041,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBoundType);
 };
 
-class HCheckCast : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTemplateInstruction<2> {
  public:
   HCheckCast(HInstruction* object,
              HLoadClass* constant,
@@ -6086,7 +6086,7 @@
   DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
-class HMemoryBarrier : public HTemplateInstruction<0> {
+class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(
@@ -6111,7 +6111,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
-class HMonitorOperation : public HTemplateInstruction<1> {
+class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
   enum class OperationKind {
     kEnter,
@@ -6156,7 +6156,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
-class HSelect : public HExpression<3> {
+class HSelect FINAL : public HExpression<3> {
  public:
   HSelect(HInstruction* condition,
           HInstruction* true_value,
@@ -6269,7 +6269,7 @@
 
 static constexpr size_t kDefaultNumberOfMoves = 4;
 
-class HParallelMove : public HTemplateInstruction<0> {
+class HParallelMove FINAL : public HTemplateInstruction<0> {
  public:
   explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc),
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
index 6a1dbb9..371e8ef 100644
--- a/compiler/optimizing/nodes_arm.h
+++ b/compiler/optimizing/nodes_arm.h
@@ -19,7 +19,7 @@
 
 namespace art {
 
-class HArmDexCacheArraysBase : public HExpression<0> {
+class HArmDexCacheArraysBase FINAL : public HExpression<0> {
  public:
   explicit HArmDexCacheArraysBase(const DexFile& dex_file)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 173852a..737aece 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -21,7 +21,7 @@
 
 namespace art {
 
-class HArm64DataProcWithShifterOp : public HExpression<2> {
+class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
  public:
   enum OpKind {
     kLSL,   // Logical shift left.
@@ -97,7 +97,7 @@
 // This instruction computes an intermediate address pointing in the 'middle' of an object. The
 // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
 // never used across anything that can trigger GC.
-class HArm64IntermediateAddress : public HExpression<2> {
+class HArm64IntermediateAddress FINAL : public HExpression<2> {
  public:
   HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
       : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c10c718..bdcf54a 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -19,7 +19,7 @@
 
 namespace art {
 
-class HMultiplyAccumulate : public HExpression<3> {
+class HMultiplyAccumulate FINAL : public HExpression<3> {
  public:
   HMultiplyAccumulate(Primitive::Type type,
                       InstructionKind op,
@@ -53,7 +53,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
 };
 
-class HBitwiseNegatedRight : public HBinaryOperation {
+class HBitwiseNegatedRight FINAL : public HBinaryOperation {
  public:
   HBitwiseNegatedRight(Primitive::Type result_type,
                             InstructionKind op,
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 0b3a84d..c3696b5 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -20,7 +20,7 @@
 namespace art {
 
 // Compute the address of the method for X86 Constant area support.
-class HX86ComputeBaseMethodAddress : public HExpression<0> {
+class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
  public:
   // Treat the value as an int32_t, but it is really a 32 bit native pointer.
   HX86ComputeBaseMethodAddress()
@@ -33,7 +33,7 @@
 };
 
 // Load a constant value from the constant table.
-class HX86LoadFromConstantTable : public HExpression<2> {
+class HX86LoadFromConstantTable FINAL : public HExpression<2> {
  public:
   HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
                             HConstant* constant)
@@ -57,7 +57,7 @@
 };
 
 // Version of HNeg with access to the constant table for FP types.
-class HX86FPNeg : public HExpression<2> {
+class HX86FPNeg FINAL : public HExpression<2> {
  public:
   HX86FPNeg(Primitive::Type result_type,
             HInstruction* input,
@@ -76,7 +76,7 @@
 };
 
 // X86 version of HPackedSwitch that holds a pointer to the base method address.
-class HX86PackedSwitch : public HTemplateInstruction<2> {
+class HX86PackedSwitch FINAL : public HTemplateInstruction<2> {
  public:
   HX86PackedSwitch(int32_t start_value,
                    int32_t num_entries,
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index e5f91dc..a7f4547 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -386,8 +386,9 @@
 
 constexpr size_t kFramePointerSize = kArmPointerSize;
 
-void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& callee_save_regs,
+void ArmAssembler::BuildFrame(size_t frame_size,
+                              ManagedRegister method_reg,
+                              ArrayRef<const ManagedRegister> callee_save_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
   CHECK_ALIGNED(frame_size, kStackAlignment);
@@ -442,7 +443,7 @@
 }
 
 void ArmAssembler::RemoveFrame(size_t frame_size,
-                              const std::vector<ManagedRegister>& callee_save_regs) {
+                               ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index ffbe786..274d0de 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -907,12 +907,13 @@
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
     OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index 5b84058..276db44 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -85,34 +85,34 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class ArmManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds);
   }
 
-  SRegister AsOverlappingDRegisterLow() const {
+  constexpr SRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2);
   }
 
-  SRegister AsOverlappingDRegisterHigh() const {
+  constexpr SRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2 + 1);
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     Register reg_low = AsRegisterPairLow();
     if (reg_low == R1) {
@@ -122,50 +122,50 @@
     }
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps SRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfPairRegIds);
   }
 
-  bool IsSameType(ArmManagedRegister test) const {
+  constexpr bool IsSameType(ArmManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsCoreRegister() && test.IsCoreRegister()) ||
@@ -182,29 +182,29 @@
 
   void Print(std::ostream& os) const;
 
-  static ArmManagedRegister FromCoreRegister(Register r) {
+  static constexpr ArmManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static ArmManagedRegister FromSRegister(SRegister r) {
+  static constexpr ArmManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static ArmManagedRegister FromDRegister(DRegister r) {
+  static constexpr ArmManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfSRegIds));
   }
 
-  static ArmManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr ArmManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds +
                           kNumberOfSRegIds + kNumberOfDRegIds));
   }
 
   // Return a RegisterPair consisting of Register r_low and r_low + 1.
-  static ArmManagedRegister FromCoreRegisterPair(Register r_low) {
+  static constexpr ArmManagedRegister FromCoreRegisterPair(Register r_low) {
     if (r_low != R1) {  // not the dalvik special case
       CHECK_NE(r_low, kNoRegister);
       CHECK_EQ(0, (r_low % 2));
@@ -217,7 +217,7 @@
   }
 
   // Return a DRegister overlapping SRegister r_low and r_low + 1.
-  static ArmManagedRegister FromSRegisterPair(SRegister r_low) {
+  static constexpr ArmManagedRegister FromSRegisterPair(SRegister r_low) {
     CHECK_NE(r_low, kNoSRegister);
     CHECK_EQ(0, (r_low % 2));
     const int r = r_low / 2;
@@ -226,7 +226,7 @@
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
@@ -251,9 +251,9 @@
 
   friend class ManagedRegister;
 
-  explicit ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static ArmManagedRegister FromRegId(int reg_id) {
+  static constexpr ArmManagedRegister FromRegId(int reg_id) {
     ArmManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -264,7 +264,7 @@
 
 }  // namespace arm
 
-inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
+constexpr inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
   arm::ArmManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index eb5112b..1842f00 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -683,8 +683,9 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                const std::vector<ManagedRegister>& callee_save_regs,
+void Arm64Assembler::BuildFrame(size_t frame_size,
+                                ManagedRegister method_reg,
+                                ArrayRef<const ManagedRegister> callee_save_regs,
                                 const ManagedRegisterEntrySpills& entry_spills) {
   // Setup VIXL CPURegList for callee-saves.
   CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
@@ -741,7 +742,7 @@
 }
 
 void Arm64Assembler::RemoveFrame(size_t frame_size,
-                                 const std::vector<ManagedRegister>& callee_save_regs) {
+                                 ArrayRef<const ManagedRegister> callee_save_regs) {
   // Setup VIXL CPURegList for callee-saves.
   CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
   CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index c4e5de7..91171a8 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -109,12 +109,13 @@
   void UnspillRegisters(vixl::CPURegList registers, int offset);
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 46be1c5..f7d74d2 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -56,80 +56,80 @@
 
 class Arm64ManagedRegister : public ManagedRegister {
  public:
-  XRegister AsXRegister() const {
+  constexpr XRegister AsXRegister() const {
     CHECK(IsXRegister());
     return static_cast<XRegister>(id_);
   }
 
-  WRegister AsWRegister() const {
+  constexpr WRegister AsWRegister() const {
     CHECK(IsWRegister());
     return static_cast<WRegister>(id_ - kNumberOfXRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds -
                                   kNumberOfDRegIds);
   }
 
-  WRegister AsOverlappingWRegister() const {
+  constexpr WRegister AsOverlappingWRegister() const {
     CHECK(IsValidManagedRegister());
     if (IsZeroRegister()) return WZR;
     return static_cast<WRegister>(AsXRegister());
   }
 
-  XRegister AsOverlappingXRegister() const {
+  constexpr XRegister AsOverlappingXRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<XRegister>(AsWRegister());
   }
 
-  SRegister AsOverlappingSRegister() const {
+  constexpr SRegister AsOverlappingSRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<SRegister>(AsDRegister());
   }
 
-  DRegister AsOverlappingDRegister() const {
+  constexpr DRegister AsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<DRegister>(AsSRegister());
   }
 
-  bool IsXRegister() const {
+  constexpr bool IsXRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfXRegIds);
   }
 
-  bool IsWRegister() const {
+  constexpr bool IsWRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfXRegIds;
     return (0 <= test) && (test < kNumberOfWRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsGPRegister() const {
+  constexpr bool IsGPRegister() const {
     return IsXRegister() || IsWRegister();
   }
 
-  bool IsFPRegister() const {
+  constexpr bool IsFPRegister() const {
     return IsDRegister() || IsSRegister();
   }
 
-  bool IsSameType(Arm64ManagedRegister test) const {
+  constexpr bool IsSameType(Arm64ManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsXRegister() && test.IsXRegister()) ||
@@ -145,53 +145,53 @@
 
   void Print(std::ostream& os) const;
 
-  static Arm64ManagedRegister FromXRegister(XRegister r) {
+  static constexpr Arm64ManagedRegister FromXRegister(XRegister r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static Arm64ManagedRegister FromWRegister(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegister(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r + kNumberOfXRegIds);
   }
 
-  static Arm64ManagedRegister FromDRegister(DRegister r) {
+  static constexpr Arm64ManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
-  static Arm64ManagedRegister FromSRegister(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds +
                           kNumberOfDRegIds));
   }
 
   // Returns the X register overlapping W register r.
-  static Arm64ManagedRegister FromWRegisterX(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegisterX(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r);
   }
 
   // Return the D register overlapping S register r.
-  static Arm64ManagedRegister FromSRegisterD(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegisterD(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  bool IsStackPointer() const {
+  constexpr bool IsStackPointer() const {
     return IsXRegister() && (id_ == SP);
   }
 
-  bool IsZeroRegister() const {
+  constexpr bool IsZeroRegister() const {
     return IsXRegister() && (id_ == XZR);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Arm64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Arm64ManagedRegister FromRegId(int reg_id) {
     Arm64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace arm64
 
-inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
+constexpr inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
   arm64::Arm64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 5267dc3..80aa630 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -32,6 +32,7 @@
 #include "memory_region.h"
 #include "mips/constants_mips.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "x86/constants_x86.h"
 #include "x86_64/constants_x86_64.h"
 
@@ -375,13 +376,14 @@
   virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
   // Emit code that will create an activation on the stack
-  virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs,
+  virtual void BuildFrame(size_t frame_size,
+                          ManagedRegister method_reg,
+                          ArrayRef<const ManagedRegister> callee_save_regs,
                           const ManagedRegisterEntrySpills& entry_spills) = 0;
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& callee_save_regs) = 0;
+                           ArrayRef<const ManagedRegister> callee_save_regs) = 0;
 
   virtual void IncreaseFrameSize(size_t adjust) = 0;
   virtual void DecreaseFrameSize(size_t adjust) = 0;
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 893daff..46adb3f 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -47,40 +47,40 @@
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
 
   ManagedRegister& operator=(const ManagedRegister& other) {
     id_ = other.id_;
     return *this;
   }
 
-  arm::ArmManagedRegister AsArm() const;
-  arm64::Arm64ManagedRegister AsArm64() const;
-  mips::MipsManagedRegister AsMips() const;
-  mips64::Mips64ManagedRegister AsMips64() const;
-  x86::X86ManagedRegister AsX86() const;
-  x86_64::X86_64ManagedRegister AsX86_64() const;
+  constexpr arm::ArmManagedRegister AsArm() const;
+  constexpr arm64::Arm64ManagedRegister AsArm64() const;
+  constexpr mips::MipsManagedRegister AsMips() const;
+  constexpr mips64::Mips64ManagedRegister AsMips64() const;
+  constexpr x86::X86ManagedRegister AsX86() const;
+  constexpr x86_64::X86_64ManagedRegister AsX86_64() const;
 
   // It is valid to invoke Equals on and with a NoRegister.
-  bool Equals(const ManagedRegister& other) const {
+  constexpr bool Equals(const ManagedRegister& other) const {
     return id_ == other.id_;
   }
 
-  bool IsNoRegister() const {
+  constexpr bool IsNoRegister() const {
     return id_ == kNoRegister;
   }
 
-  static ManagedRegister NoRegister() {
+  static constexpr ManagedRegister NoRegister() {
     return ManagedRegister();
   }
 
-  int RegId() const { return id_; }
-  explicit ManagedRegister(int reg_id) : id_(reg_id) { }
+  constexpr int RegId() const { return id_; }
+  explicit constexpr ManagedRegister(int reg_id) : id_(reg_id) { }
 
  protected:
   static const int kNoRegister = -1;
 
-  ManagedRegister() : id_(kNoRegister) { }
+  constexpr ManagedRegister() : id_(kNoRegister) { }
 
   int id_;
 };
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index a1798c0..9368301 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2438,8 +2438,9 @@
 
 constexpr size_t kFramePointerSize = 4;
 
-void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                               const std::vector<ManagedRegister>& callee_save_regs,
+void MipsAssembler::BuildFrame(size_t frame_size,
+                               ManagedRegister method_reg,
+                               ArrayRef<const ManagedRegister> callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
@@ -2453,7 +2454,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -2482,7 +2483,7 @@
 }
 
 void MipsAssembler::RemoveFrame(size_t frame_size,
-                                const std::vector<ManagedRegister>& callee_save_regs) {
+                                ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
   cfi_.RememberState();
@@ -2490,7 +2491,7 @@
   // Pop callee saves and return address.
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index ecb67bd..d5e6285 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -414,11 +414,11 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index cec43ba..56e5884 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -561,6 +561,14 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
 }
 
+TEST_F(AssemblerMIPSTest, FloorWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s");
+}
+
+TEST_F(AssemblerMIPSTest, FloorWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d");
+}
+
 TEST_F(AssemblerMIPSTest, CunS) {
   DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"),
             "CunS");
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index 5e7ed11..66204e7 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -87,70 +87,70 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class MipsManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  FRegister AsFRegister() const {
+  constexpr FRegister AsFRegister() const {
     CHECK(IsFRegister());
     return static_cast<FRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfFRegIds);
   }
 
-  FRegister AsOverlappingDRegisterLow() const {
+  constexpr FRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2);
   }
 
-  FRegister AsOverlappingDRegisterHigh() const {
+  constexpr FRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2 + 1);
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsFRegister() const {
+  constexpr bool IsFRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfFRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps FRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds);
@@ -164,32 +164,32 @@
   // then false is returned.
   bool Overlaps(const MipsManagedRegister& other) const;
 
-  static MipsManagedRegister FromCoreRegister(Register r) {
+  static constexpr MipsManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static MipsManagedRegister FromFRegister(FRegister r) {
+  static constexpr MipsManagedRegister FromFRegister(FRegister r) {
     CHECK_NE(r, kNoFRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static MipsManagedRegister FromDRegister(DRegister r) {
+  static constexpr MipsManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + kNumberOfCoreRegIds + kNumberOfFRegIds);
   }
 
-  static MipsManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr MipsManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -205,9 +205,9 @@
 
   friend class ManagedRegister;
 
-  explicit MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static MipsManagedRegister FromRegId(int reg_id) {
+  static constexpr MipsManagedRegister FromRegId(int reg_id) {
     MipsManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -218,7 +218,7 @@
 
 }  // namespace mips
 
-inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
+constexpr inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
   mips::MipsManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index ab480ca..447ede5 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1977,8 +1977,9 @@
 
 constexpr size_t kFramePointerSize = 8;
 
-void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& callee_save_regs,
+void Mips64Assembler::BuildFrame(size_t frame_size,
+                                 ManagedRegister method_reg,
+                                 ArrayRef<const ManagedRegister> callee_save_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
@@ -1992,7 +1993,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -2003,7 +2004,7 @@
   // Write out entry spills.
   int32_t offset = frame_size + kFramePointerSize;
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Mips64ManagedRegister reg = entry_spills.at(i).AsMips64();
+    Mips64ManagedRegister reg = entry_spills[i].AsMips64();
     ManagedRegisterSpill spill = entry_spills.at(i);
     int32_t size = spill.getSize();
     if (reg.IsNoRegister()) {
@@ -2022,7 +2023,7 @@
 }
 
 void Mips64Assembler::RemoveFrame(size_t frame_size,
-                                  const std::vector<ManagedRegister>& callee_save_regs) {
+                                  ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
   cfi_.RememberState();
@@ -2030,7 +2031,7 @@
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 8acc38a..0cd0708 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -365,13 +365,13 @@
   //
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size,
-                   const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 1d36128..c9f9556 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -39,22 +39,22 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
  public:
-  GpuRegister AsGpuRegister() const {
+  constexpr GpuRegister AsGpuRegister() const {
     CHECK(IsGpuRegister());
     return static_cast<GpuRegister>(id_);
   }
 
-  FpuRegister AsFpuRegister() const {
+  constexpr FpuRegister AsFpuRegister() const {
     CHECK(IsFpuRegister());
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
-  bool IsGpuRegister() const {
+  constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
   }
 
-  bool IsFpuRegister() const {
+  constexpr bool IsFpuRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfGpuRegIds;
     return (0 <= test) && (test < kNumberOfFpuRegIds);
@@ -67,22 +67,22 @@
   // then false is returned.
   bool Overlaps(const Mips64ManagedRegister& other) const;
 
-  static Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
+  static constexpr Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
     CHECK_NE(r, kNoGpuRegister);
     return FromRegId(r);
   }
 
-  static Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
+  static constexpr Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
     CHECK_NE(r, kNoFpuRegister);
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -98,9 +98,9 @@
 
   friend class ManagedRegister;
 
-  explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Mips64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Mips64ManagedRegister FromRegId(int reg_id) {
     Mips64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -111,7 +111,7 @@
 
 }  // namespace mips64
 
-inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
+constexpr inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
   mips64::Mips64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 84cdb7d..f931d75 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1932,15 +1932,16 @@
 
 constexpr size_t kFramePointerSize = 4;
 
-void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs,
+void X86Assembler::BuildFrame(size_t frame_size,
+                              ManagedRegister method_reg,
+                              ArrayRef<const ManagedRegister> spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
   DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
     pushl(spill);
     gpr_count++;
     cfi_.AdjustCFAOffset(kFramePointerSize);
@@ -1974,7 +1975,7 @@
   }
 }
 
-void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) {
+void X86Assembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
   // -kFramePointerSize for ArtMethod*.
@@ -1982,7 +1983,7 @@
   addl(ESP, Immediate(adjust));
   cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
     popl(spill);
     cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
     cfi_.Restore(DWARFReg(spill));
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index bc46e9f..fa61662 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -633,12 +633,13 @@
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index fc20d7e..c0c2b65 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -89,64 +89,64 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  ByteRegister AsByteRegister() const {
+  constexpr ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
     return static_cast<ByteRegister>(id_);
   }
 
-  Register AsCpuRegister() const {
+  constexpr Register AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return static_cast<Register>(id_);
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return static_cast<XmmRegister>(id_ - kNumberOfCpuRegIds);
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     return static_cast<RegisterPair>(id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds));
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -160,33 +160,33 @@
   // then false is returned.
   bool Overlaps(const X86ManagedRegister& other) const;
 
-  static X86ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86ManagedRegister FromXmmRegister(XmmRegister r) {
+  static constexpr X86ManagedRegister FromXmmRegister(XmmRegister r) {
     CHECK_NE(r, kNoXmmRegister);
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86ManagedRegister FromRegId(int reg_id) {
     X86ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace x86
 
-inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
+constexpr inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
   x86::X86ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 5e7b587..3046710 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2648,15 +2648,16 @@
 
 constexpr size_t kFramePointerSize = 8;
 
-void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& spill_regs,
+void X86_64Assembler::BuildFrame(size_t frame_size,
+                                 ManagedRegister method_reg,
+                                 ArrayRef<const ManagedRegister> spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
@@ -2674,7 +2675,7 @@
   // spill xmms
   int64_t offset = rest_of_frame;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsXmmRegister()) {
       offset -= sizeof(double);
       movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
@@ -2707,15 +2708,14 @@
   }
 }
 
-void X86_64Assembler::RemoveFrame(size_t frame_size,
-                            const std::vector<ManagedRegister>& spill_regs) {
+void X86_64Assembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
   int gpr_count = 0;
   // unspill xmms
   int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsXmmRegister()) {
       offset += sizeof(double);
       movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
@@ -2728,7 +2728,7 @@
   addq(CpuRegister(RSP), Immediate(adjust));
   cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsCpuRegister()) {
       popq(spill.AsCpuRegister());
       cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 720a402..361f73c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -704,12 +704,13 @@
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9dccc9f..788c725 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1498,9 +1498,11 @@
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   // Three random entry spills.
   ManagedRegisterEntrySpills entry_spills;
@@ -1543,9 +1545,11 @@
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   size_t frame_size = 10 * kStackAlignment;
   assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 0c782d4..37db6b1 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -29,15 +29,15 @@
 
 class CpuRegister {
  public:
-  explicit CpuRegister(Register r) : reg_(r) {}
-  explicit CpuRegister(int r) : reg_(Register(r)) {}
-  Register AsRegister() const {
+  explicit constexpr CpuRegister(Register r) : reg_(r) {}
+  explicit constexpr CpuRegister(int r) : reg_(Register(r)) {}
+  constexpr Register AsRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
@@ -47,15 +47,15 @@
 
 class XmmRegister {
  public:
-  explicit XmmRegister(FloatRegister r) : reg_(r) {}
-  explicit XmmRegister(int r) : reg_(FloatRegister(r)) {}
-  FloatRegister AsFloatRegister() const {
+  explicit constexpr XmmRegister(FloatRegister r) : reg_(r) {}
+  explicit constexpr XmmRegister(int r) : reg_(FloatRegister(r)) {}
+  constexpr FloatRegister AsFloatRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index c4228c1..32af672 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -88,52 +88,52 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  CpuRegister AsCpuRegister() const {
+  constexpr CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return XmmRegister(static_cast<FloatRegister>(id_ - kNumberOfCpuRegIds));
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  CpuRegister AsRegisterPairLow() const {
+  constexpr CpuRegister AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  CpuRegister AsRegisterPairHigh() const {
+  constexpr CpuRegister AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -147,32 +147,32 @@
   // then false is returned.
   bool Overlaps(const X86_64ManagedRegister& other) const;
 
-  static X86_64ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86_64ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
+  static constexpr X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86_64ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86_64ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -188,9 +188,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86_64ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86_64ManagedRegister FromRegId(int reg_id) {
     X86_64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -201,7 +201,7 @@
 
 }  // namespace x86_64
 
-inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
+constexpr inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
   x86_64::X86_64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cb274dc..cce83f3 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1269,6 +1269,21 @@
       CHECK(runtime != nullptr);
       std::set<DexCacheResolvedClasses> resolved_classes(
           profile_compilation_info_->GetResolvedClasses());
+
+      // Filter out class path classes since we don't want to include these in the image.
+      std::unordered_set<std::string> dex_files_locations;
+      for (const DexFile* dex_file : dex_files_) {
+        dex_files_locations.insert(dex_file->GetLocation());
+      }
+      for (auto it = resolved_classes.begin(); it != resolved_classes.end(); ) {
+        if (dex_files_locations.find(it->GetDexLocation()) == dex_files_locations.end()) {
+          VLOG(compiler) << "Removed profile samples for non-app dex file " << it->GetDexLocation();
+          it = resolved_classes.erase(it);
+        } else {
+          ++it;
+        }
+      }
+
       image_classes_.reset(new std::unordered_set<std::string>(
           runtime->GetClassLinker()->GetClassDescriptorsForProfileKeys(resolved_classes)));
       VLOG(compiler) << "Loaded " << image_classes_->size()
@@ -1450,25 +1465,6 @@
           class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
     }
 
-    /*
-     * If we're not in interpret-only or verify-none or verify-at-runtime or verify-profile mode,
-     * go ahead and compile small applications.  Don't bother to check if we're doing the image.
-     */
-    if (!IsBootImage() &&
-        compiler_options_->IsCompilationEnabled() &&
-        compiler_kind_ == Compiler::kQuick) {
-      size_t num_methods = 0;
-      for (size_t i = 0; i != dex_files_.size(); ++i) {
-        const DexFile* dex_file = dex_files_[i];
-        CHECK(dex_file != nullptr);
-        num_methods += dex_file->NumMethodIds();
-      }
-      if (num_methods <= compiler_options_->GetNumDexMethodsThreshold()) {
-        compiler_options_->SetCompilerFilter(CompilerFilter::kSpeed);
-        VLOG(compiler) << "Below method threshold, compiling anyways";
-      }
-    }
-
     return true;
   }
 
@@ -2462,6 +2458,7 @@
   bool multi_image_;
   bool is_host_;
   std::string android_root_;
+  // Dex files we are compiling, does not include the class path dex files.
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
diff --git a/libart_fake/Android.mk b/libart_fake/Android.mk
new file mode 100644
index 0000000..ed868a5
--- /dev/null
+++ b/libart_fake/Android.mk
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libart_fake
+LOCAL_INSTALLED_MODULE_STEM := libart.so
+LOCAL_SDK_VERSION := 9
+LOCAL_CPP_EXTENSION := .cc
+LOCAL_SRC_FILES := fake.cc
+LOCAL_SHARED_LIBRARIES := liblog
+
+ifdef TARGET_2ND_ARCH
+    LOCAL_MODULE_PATH_32 := $(TARGET_OUT)/fake-libs
+    LOCAL_MODULE_PATH_64 := $(TARGET_OUT)/fake-libs64
+else
+    LOCAL_MODULE_PATH := $(TARGET_OUT)/fake-libs
+endif
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/libart_fake/README.md b/libart_fake/README.md
new file mode 100644
index 0000000..6e3621e
--- /dev/null
+++ b/libart_fake/README.md
@@ -0,0 +1,5 @@
+libart_fake
+====
+
+A fake libart made to satisfy some misbehaving apps that will attempt to link
+against libart.so.
diff --git a/libart_fake/fake.cc b/libart_fake/fake.cc
new file mode 100644
index 0000000..8842421
--- /dev/null
+++ b/libart_fake/fake.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "libart_fake"
+
+#include <android/log.h>
+
+#define LOGIT(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
+namespace art {
+class Dbg {
+ public:
+  void SuspendVM();
+  void ResumeVM();
+};
+
+class FaultManager {
+ public:
+  void EnsureArtActionInFrontOfSignalChain();
+};
+
+void Dbg::SuspendVM() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+void Dbg::ResumeVM() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+void FaultManager::EnsureArtActionInFrontOfSignalChain() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+};  // namespace art
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f0e9ac5..4c68862 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -97,7 +97,8 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
   // Read barrier.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 321b9d2..1bba4f9 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1679,145 +1679,6 @@
     pop {r4, r10-r11, pc}
 END art_quick_indexof
 
-   /*
-     * String's compareTo.
-     *
-     * Requires rARG0/rARG1 to have been previously checked for null.  Will
-     * return negative if this's string is < comp, 0 if they are the
-     * same and positive if >.
-     *
-     * On entry:
-     *    r0:   this object pointer
-     *    r1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    r2, r0         @ this to r2, opening up r0 for return value
-    sub    r0, r2, r1     @ Same?
-    cbnz   r0,1f
-    bx     lr
-1:                        @ Same strings, return.
-
-    push {r4, r7-r12, lr} @ 8 words - keep alignment
-    .cfi_adjust_cfa_offset 32
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r7, 4
-    .cfi_rel_offset r8, 8
-    .cfi_rel_offset r9, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset r12, 24
-    .cfi_rel_offset lr, 28
-
-    ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
-    add    r2, #MIRROR_STRING_VALUE_OFFSET
-    add    r1, #MIRROR_STRING_VALUE_OFFSET
-
-    /*
-     * At this point, we have:
-     *    value:  r2/r1
-     *    offset: r4/r9
-     *    count:  r7/r10
-     * We're going to compute
-     *    r11 <- countDiff
-     *    r10 <- minCount
-     */
-     subs  r11, r7, r10
-     it    ls
-     movls r10, r7
-
-     /*
-      * Note: data pointers point to previous element so we can use pre-index
-      * mode with base writeback.
-      */
-     subs  r2, #2   @ offset to contents[-1]
-     subs  r1, #2   @ offset to contents[-1]
-
-     /*
-      * At this point we have:
-      *   r2: *this string data
-      *   r1: *comp string data
-      *   r10: iteration count for comparison
-      *   r11: value to return if the first part of the string is equal
-      *   r0: reserved for result
-      *   r3, r4, r7, r8, r9, r12 available for loading string data
-      */
-
-    subs  r10, #2
-    blt   .Ldo_remainder2
-
-      /*
-       * Unroll the first two checks so we can quickly catch early mismatch
-       * on long strings (but preserve incoming alignment)
-       */
-
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    bne   .Ldone
-    cmp   r10, #28
-    bgt   .Ldo_memcmp16
-    subs  r10, #3
-    blt   .Ldo_remainder
-
-.Lloopback_triple:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    ldrh  r9, [r2, #2]!
-    ldrh  r12,[r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    it    eq
-    subseq  r0, r9, r12
-    bne   .Ldone
-    subs  r10, #3
-    bge   .Lloopback_triple
-
-.Ldo_remainder:
-    adds  r10, #3
-    beq   .Lreturn_diff
-
-.Lloopback_single:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    subs  r0, r3, r4
-    bne   .Ldone
-    subs  r10, #1
-    bne   .Lloopback_single
-
-.Lreturn_diff:
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-.Ldo_remainder2:
-    adds  r10, #2
-    bne   .Lloopback_single
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-    /* Long string case */
-.Ldo_memcmp16:
-    mov   r7, r11
-    add   r0, r2, #2
-    add   r1, r1, #2
-    mov   r2, r10
-    bl    __memcmp16
-    cmp   r0, #0
-    it    eq
-    moveq r0, r7
-.Ldone:
-    pop   {r4, r7-r12, pc}
-END art_quick_string_compareto
-
     /* Assembly routines used to handle ABI differences. */
 
     /* double fmod(double a, double b) */
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 02629e8..a7d6d6f 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1205,9 +1205,9 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-  // There is no StringCompareTo runtime entrypoint for __aarch64__.
-#if defined(__i386__) || defined(__arm__) || \
-    defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
+  // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__.
+#if defined(__i386__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index eddd172..48bec73 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,8 +19,8 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE (64 + 4*8)
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE (64 + 4*8)
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (176 + 4*8)
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index d911497..98d3345 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -122,21 +122,21 @@
 
 #define FIELD_GET(object, type) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    return object->GetField ## type ## Volatile(GetOffset()); \
+    return (object)->GetField ## type ## Volatile(GetOffset()); \
   } \
-  return object->GetField ## type(GetOffset());
+  return (object)->GetField ## type(GetOffset());
 
 #define FIELD_SET(object, type, value) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    object->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
   } else { \
-    object->SetField ## type<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type<kTransactionActive>(GetOffset(), value); \
   }
 
 inline uint8_t ArtField::GetBoolean(mirror::Object* object) {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 21725d3..8eb3742 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -396,10 +396,10 @@
 #define THREAD_CHECKPOINT_REQUEST 2
 ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
 
-#define JIT_CHECK_OSR -1
+#define JIT_CHECK_OSR (-1)
 ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
 
-#define JIT_HOTNESS_DISABLE -2
+#define JIT_HOTNESS_DISABLE (-2)
 ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
 
 #if defined(__cplusplus)
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index c7a0ba2..4af47d1 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -202,9 +202,13 @@
 
 template <class Value>
 inline void Histogram<Value>::PrintMemoryUse(std::ostream &os) const {
-  os << Name()
-     << ": Avg: " << PrettySize(Mean()) << " Max: "
-     << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  os << Name();
+  if (sample_size_ != 0u) {
+    os << ": Avg: " << PrettySize(Mean()) << " Max: "
+       << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  } else {
+    os << ": <no data>\n";
+  }
 }
 
 template <class Value>
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 3b5b8b5..6323eee 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -140,11 +140,11 @@
 
 // Helper for CHECK_STRxx(s1,s2) macros.
 #define CHECK_STROP(s1, s2, sense) \
-  if (UNLIKELY((strcmp(s1, s2) == 0) != sense)) \
+  if (UNLIKELY((strcmp(s1, s2) == 0) != (sense))) \
     LOG(::art::FATAL) << "Check failed: " \
-        << "\"" << s1 << "\"" \
-        << (sense ? " == " : " != ") \
-        << "\"" << s2 << "\""
+        << "\"" << (s1) << "\"" \
+        << ((sense) ? " == " : " != ") \
+        << "\"" << (s2) << "\""
 
 // Check for string (const char*) equality between s1 and s2, LOG(FATAL) if not.
 #define CHECK_STREQ(s1, s2) CHECK_STROP(s1, s2, true)
@@ -156,7 +156,7 @@
     int rc = call args; \
     if (rc != 0) { \
       errno = rc; \
-      PLOG(::art::FATAL) << # call << " failed for " << what; \
+      PLOG(::art::FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
@@ -198,14 +198,14 @@
 // types of LHS and RHS.
 template <typename LHS, typename RHS>
 struct EagerEvaluator {
-  EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
+  constexpr EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
   LHS lhs;
   RHS rhs;
 };
 
 // Helper function for CHECK_xx.
 template <typename LHS, typename RHS>
-static inline EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
+static inline constexpr EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
   return EagerEvaluator<LHS, RHS>(lhs, rhs);
 }
 
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 7a293c7..3c43253 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -75,7 +75,7 @@
     ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } \
     ALWAYS_INLINE void operator delete(void*, void*) noexcept { } \
   private: \
-    void* operator new(size_t) = delete
+    void* operator new(size_t) = delete // NOLINT
 
 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
@@ -135,7 +135,7 @@
 #define ARRAYSIZE_UNSAFE(a) \
   ((sizeof(a) / sizeof(*(a))) / static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
 
-#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)
+#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f) // NOLINT
 
 #define OFFSETOF_MEMBER(t, f) \
   (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u)) // NOLINT
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 620bf9c..6f689d7 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -49,7 +49,7 @@
 MutatorMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
 ReaderWriterMutex* Locks::oat_file_manager_lock_ = nullptr;
-ReaderWriterMutex* Locks::oat_file_count_lock_ = nullptr;
+Mutex* Locks::host_dlopen_handles_lock_ = nullptr;
 Mutex* Locks::reference_processor_lock_ = nullptr;
 Mutex* Locks::reference_queue_cleared_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_finalizer_references_lock_ = nullptr;
@@ -953,7 +953,7 @@
     DCHECK(deoptimization_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
     DCHECK(oat_file_manager_lock_ != nullptr);
-    DCHECK(oat_file_count_lock_ != nullptr);
+    DCHECK(host_dlopen_handles_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
     DCHECK(jni_libraries_lock_ != nullptr);
     DCHECK(logging_lock_ != nullptr);
@@ -971,7 +971,7 @@
     instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
-      if (new_level >= current_lock_level) { \
+      if ((new_level) >= current_lock_level) { \
         /* Do not use CHECKs or FATAL here, abort_lock_ is not setup yet. */ \
         fprintf(stderr, "New local level %d is not less than current level %d\n", \
                 new_level, current_lock_level); \
@@ -1042,9 +1042,9 @@
     DCHECK(oat_file_manager_lock_ == nullptr);
     oat_file_manager_lock_ = new ReaderWriterMutex("OatFile manager lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kOatFileCountLock);
-    DCHECK(oat_file_count_lock_ == nullptr);
-    oat_file_count_lock_ = new ReaderWriterMutex("OatFile count lock", current_lock_level);
+    UPDATE_CURRENT_LOCK_LEVEL(kHostDlOpenHandlesLock);
+    DCHECK(host_dlopen_handles_lock_ == nullptr);
+    host_dlopen_handles_lock_ = new Mutex("host dlopen handles lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3dca12a..3d7624d 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -83,7 +83,7 @@
   kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
-  kOatFileCountLock,
+  kHostDlOpenHandlesLock,
   kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
@@ -651,11 +651,11 @@
   // Guards opened oat files in OatFileManager.
   static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
-  // Guards opened oat files in OatFileManager.
-  static ReaderWriterMutex* oat_file_count_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
+  // Guards dlopen_handles_ in DlOpenOatFile.
+  static Mutex* host_dlopen_handles_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
 
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(oat_file_count_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(host_dlopen_handles_lock_);
 
   // Guards reference processor.
   static Mutex* reference_processor_lock_ ACQUIRED_AFTER(intern_table_lock_);
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 639f913..96fa53c 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -2429,19 +2429,20 @@
                                                      Primitive::kPrimDouble));
   }
 
+// NOLINT added to avoid wrong warning/fix from clang-tidy.
 #define PRIMITIVE_ARRAY_FUNCTIONS(ctype, name, ptype) \
-  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { \
-    return reinterpret_cast<ctype*>( \
+  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { /* NOLINT */ \
+    return reinterpret_cast<ctype*>( /* NOLINT */ \
         GetPrimitiveArrayElements(__FUNCTION__, ptype, env, array, is_copy)); \
   } \
   \
-  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, \
+  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, /* NOLINT */ \
                                            jint mode) { \
     ReleasePrimitiveArrayElements(__FUNCTION__, ptype, env, array, elems, mode); \
   } \
   \
   static void Get##name##ArrayRegion(JNIEnv* env, ctype##Array array, jsize start, jsize len, \
-                                     ctype* buf) { \
+                                     ctype* buf) { /* NOLINT */ \
     GetPrimitiveArrayRegion(__FUNCTION__, ptype, env, array, start, len, buf); \
   } \
   \
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 1835c72..9ac27a1 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -52,6 +52,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
 #include "image-inl.h"
@@ -1347,7 +1348,8 @@
         for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
           // The image space is not yet added to the heap, avoid read barriers.
           mirror::Class* klass = types[j].Read();
-          if (klass != nullptr) {
+          // There may also be boot image classes,
+          if (space->HasAddress(klass)) {
             DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
             // Update the class loader from the one in the image class loader to the one that loaded
             // the app image.
@@ -1386,6 +1388,9 @@
                 VLOG(image) << PrettyMethod(&m);
               }
             }
+          } else {
+            DCHECK(klass == nullptr || heap->ObjectIsInBootImageSpace(klass))
+                << klass << " " << PrettyClass(klass);
           }
         }
       }
@@ -1393,10 +1398,10 @@
         for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
           // The image space is not yet added to the heap, avoid read barriers.
           mirror::Class* klass = types[j].Read();
-          if (klass != nullptr) {
+          if (space->HasAddress(klass)) {
             DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
             if (kIsDebugBuild) {
-              if (new_class_set != nullptr)   {
+              if (new_class_set != nullptr) {
                 auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
                 DCHECK(it != new_class_set->end());
                 DCHECK_EQ(it->Read(), klass);
@@ -1661,6 +1666,10 @@
     // resolve the same way, simply flatten the hierarchy in the way the resolution order would be,
     // and check that the dex file names are the same.
     for (mirror::ClassLoader* image_class_loader : image_class_loaders) {
+      if (IsBootClassLoader(soa, image_class_loader)) {
+        // The dex cache can reference types from the boot class loader.
+        continue;
+      }
       std::list<mirror::String*> image_dex_file_names;
       std::string temp_error_msg;
       if (!FlattenPathClassLoader(image_class_loader, &image_dex_file_names, &temp_error_msg)) {
@@ -5316,6 +5325,19 @@
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
   uint16_t super_class_idx = class_def.superclass_idx_;
   if (super_class_idx != DexFile::kDexNoIndex16) {
+    // Check that a class does not inherit from itself directly.
+    //
+    // TODO: This is a cheap check to detect the straightforward case
+    // of a class extending itself (b/28685551), but we should do a
+    // proper cycle detection on loaded classes, to detect all cases
+    // of class circularity errors (b/28830038).
+    if (super_class_idx == class_def.class_idx_) {
+      ThrowClassCircularityError(klass.Get(),
+                                 "Class %s extends itself",
+                                 PrettyDescriptor(klass.Get()).c_str());
+      return false;
+    }
+
     mirror::Class* super_class = ResolveType(dex_file, super_class_idx, klass.Get());
     if (super_class == nullptr) {
       DCHECK(Thread::Current()->IsExceptionPending());
@@ -6968,8 +6990,13 @@
       }
     }
     // Put some random garbage in old methods to help find stale pointers.
-    if (methods != old_methods && old_methods != nullptr) {
-      WriterMutexLock mu(self, ClassTableForClassLoader(klass->GetClassLoader())->GetLock());
+    if (methods != old_methods && old_methods != nullptr && kIsDebugBuild) {
+      // Need to make sure the GC is not running since it could be scanning the methods we are
+      // about to overwrite.
+      ScopedThreadStateChange tsc(self, kSuspended);
+      gc::ScopedGCCriticalSection gcs(self,
+                                      gc::kGcCauseClassLinker,
+                                      gc::kCollectorTypeClassLinker);
       memset(old_methods, 0xFEu, old_size);
     }
   } else {
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index f8e32c4..75cce42 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -137,6 +137,13 @@
   ThrowException("Ljava/lang/ClassCircularityError;", c, msg.str().c_str());
 }
 
+void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  ThrowException("Ljava/lang/ClassCircularityError;", c, fmt, &args);
+  va_end(args);
+}
+
 // ClassFormatError
 
 void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 39c4e52..c3a1f09 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -58,6 +58,9 @@
 void ThrowClassCircularityError(mirror::Class* c)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // ClassCastException
 
 void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type)
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
index d617caf..dc197c1 100644
--- a/runtime/compiler_filter.cc
+++ b/runtime/compiler_filter.cc
@@ -20,7 +20,7 @@
 
 namespace art {
 
-bool CompilerFilter::IsCompilationEnabled(Filter filter) {
+bool CompilerFilter::IsBytecodeCompilationEnabled(Filter filter) {
   switch (filter) {
     case CompilerFilter::kVerifyNone:
     case CompilerFilter::kVerifyAtRuntime:
@@ -39,6 +39,25 @@
   UNREACHABLE();
 }
 
+bool CompilerFilter::IsJniCompilationEnabled(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime: return false;
+
+    case CompilerFilter::kVerifyProfile:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverythingProfile:
+    case CompilerFilter::kEverything: return true;
+  }
+  UNREACHABLE();
+}
+
 bool CompilerFilter::IsVerificationEnabled(Filter filter) {
   switch (filter) {
     case CompilerFilter::kVerifyNone:
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index e8d74dd..37631cc 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -30,10 +30,10 @@
   // Note: Order here matters. Later filter choices are considered "as good
   // as" earlier filter choices.
   enum Filter {
-    kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
-    kVerifyAtRuntime,     // Only compile JNI stubs and verify at runtime.
-    kVerifyProfile,       // Verify only the classes in the profile.
-    kInterpretOnly,       // Verify, and compile only JNI stubs.
+    kVerifyNone,          // Skip verification but mark all classes as verified anyway.
+    kVerifyAtRuntime,     // Delay verication to runtime, do not compile anything.
+    kVerifyProfile,       // Verify only the classes in the profile, compile only JNI stubs.
+    kInterpretOnly,       // Verify everything, compile only JNI stubs.
     kTime,                // Compile methods, but minimize compilation time.
     kSpaceProfile,        // Maximize space savings based on profile.
     kSpace,               // Maximize space savings.
@@ -47,8 +47,12 @@
   static const Filter kDefaultCompilerFilter = kSpeed;
 
   // Returns true if an oat file with this compiler filter contains
-  // compiled executable code.
-  static bool IsCompilationEnabled(Filter filter);
+  // compiled executable code for bytecode.
+  static bool IsBytecodeCompilationEnabled(Filter filter);
+
+  // Returns true if an oat file with this compiler filter contains
+  // compiled executable code for JNI methods.
+  static bool IsJniCompilationEnabled(Filter filter);
 
   // Returns true if this compiler filter requires running verification.
   static bool IsVerificationEnabled(Filter filter);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 8005642..5b54f7d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -644,8 +644,7 @@
 
   LOG(INFO) << "Debugger is no longer active";
 
-  // Suspend all threads and exclusively acquire the mutator lock. Set the state of the thread
-  // to kRunnable to avoid scoped object access transitions. Remove the debugger as a listener
+  // Suspend all threads and exclusively acquire the mutator lock. Remove the debugger as a listener
   // and clear the object registry.
   Runtime* runtime = Runtime::Current();
   Thread* self = Thread::Current();
@@ -655,7 +654,6 @@
                                     gc::kGcCauseInstrumentation,
                                     gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
-    ThreadState old_state = self->SetStateUnsafe(kRunnable);
     // Debugger may not be active at this point.
     if (IsDebuggerActive()) {
       {
@@ -676,7 +674,6 @@
       }
       gDebuggerActive = false;
     }
-    CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   }
 
   {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index ce7f62a..638821b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -57,7 +57,11 @@
 // TODO: move all of the macro functionality into the DexCache class.
 class DexFile {
  public:
+  // First Dex format version supporting default methods.
   static const uint32_t kDefaultMethodsVersion = 37;
+  // First Dex format version enforcing class definition ordering rules.
+  static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
+
   static const uint8_t kDexMagic[];
   static constexpr size_t kNumDexVersions = 2;
   static constexpr size_t kDexVersionLen = 4;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index bbffbbb..1d24349 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -101,31 +101,31 @@
 }
 
 // Helper macro to load string and return false on error.
-#define LOAD_STRING(var, idx, error)                  \
-  const char* var = CheckLoadStringByIdx(idx, error); \
-  if (UNLIKELY(var == nullptr)) {                     \
-    return false;                                     \
+#define LOAD_STRING(var, idx, error)                    \
+  const char* (var) = CheckLoadStringByIdx(idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                     \
+    return false;                                       \
   }
 
 // Helper macro to load string by type idx and return false on error.
-#define LOAD_STRING_BY_TYPE(var, type_idx, error)              \
-  const char* var = CheckLoadStringByTypeIdx(type_idx, error); \
-  if (UNLIKELY(var == nullptr)) {                              \
-    return false;                                              \
+#define LOAD_STRING_BY_TYPE(var, type_idx, error)                \
+  const char* (var) = CheckLoadStringByTypeIdx(type_idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                              \
+    return false;                                                \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_METHOD(var, idx, error_string, error_stmt)                 \
-  const DexFile::MethodId* var  = CheckLoadMethodId(idx, error_string); \
-  if (UNLIKELY(var == nullptr)) {                                       \
-    error_stmt;                                                         \
+#define LOAD_METHOD(var, idx, error_string, error_stmt)                   \
+  const DexFile::MethodId* (var)  = CheckLoadMethodId(idx, error_string); \
+  if (UNLIKELY((var) == nullptr)) {                                       \
+    error_stmt;                                                           \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_FIELD(var, idx, fmt, error_stmt)               \
-  const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \
-  if (UNLIKELY(var == nullptr)) {                           \
-    error_stmt;                                             \
+#define LOAD_FIELD(var, idx, fmt, error_stmt)                 \
+  const DexFile::FieldId* (var) = CheckLoadFieldId(idx, fmt); \
+  if (UNLIKELY((var) == nullptr)) {                           \
+    error_stmt;                                               \
   }
 
 bool DexFileVerifier::Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
@@ -1956,6 +1956,31 @@
   }
 
   if (item->superclass_idx_ != DexFile::kDexNoIndex16) {
+    if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
+      // Check that a class does not inherit from itself directly (by having
+      // the same type idx as its super class).
+      if (UNLIKELY(item->superclass_idx_ == item->class_idx_)) {
+        ErrorStringPrintf("Class with same type idx as its superclass: '%d'", item->class_idx_);
+        return false;
+      }
+
+      // Check that a class is defined after its super class (if the
+      // latter is defined in the same Dex file).
+      const DexFile::ClassDef* superclass_def = dex_file_->FindClassDef(item->superclass_idx_);
+      if (superclass_def != nullptr) {
+        // The superclass is defined in this Dex file.
+        if (superclass_def > item) {
+          // ClassDef item for super class appearing after the class' ClassDef item.
+          ErrorStringPrintf("Invalid class definition ordering:"
+                            " class with type idx: '%d' defined before"
+                            " superclass with type idx: '%d'",
+                            item->class_idx_,
+                            item->superclass_idx_);
+          return false;
+        }
+      }
+    }
+
     LOAD_STRING_BY_TYPE(superclass_descriptor, item->superclass_idx_,
                         "inter_class_def_item superclass_idx")
     if (UNLIKELY(!IsValidDescriptor(superclass_descriptor) || superclass_descriptor[0] != 'L')) {
@@ -1964,12 +1989,39 @@
     }
   }
 
+  // Check interfaces.
   const DexFile::TypeList* interfaces = dex_file_->GetInterfacesList(*item);
   if (interfaces != nullptr) {
     uint32_t size = interfaces->Size();
-
-    // Ensure that all interfaces refer to classes (not arrays or primitives).
     for (uint32_t i = 0; i < size; i++) {
+      if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
+        // Check that a class does not implement itself directly (by having the
+        // same type idx as one of its immediate implemented interfaces).
+        if (UNLIKELY(interfaces->GetTypeItem(i).type_idx_ == item->class_idx_)) {
+          ErrorStringPrintf("Class with same type idx as implemented interface: '%d'",
+                            item->class_idx_);
+          return false;
+        }
+
+        // Check that a class is defined after the interfaces it implements
+        // (if they are defined in the same Dex file).
+        const DexFile::ClassDef* interface_def =
+            dex_file_->FindClassDef(interfaces->GetTypeItem(i).type_idx_);
+        if (interface_def != nullptr) {
+          // The interface is defined in this Dex file.
+          if (interface_def > item) {
+            // ClassDef item for interface appearing after the class' ClassDef item.
+            ErrorStringPrintf("Invalid class definition ordering:"
+                              " class with type idx: '%d' defined before"
+                              " implemented interface with type idx: '%d'",
+                              item->class_idx_,
+                              interfaces->GetTypeItem(i).type_idx_);
+            return false;
+          }
+        }
+      }
+
+      // Ensure that the interface refers to a class (not an array nor a primitive type).
       LOAD_STRING_BY_TYPE(inf_descriptor, interfaces->GetTypeItem(i).type_idx_,
                           "inter_class_def_item interface type_idx")
       if (UNLIKELY(!IsValidDescriptor(inf_descriptor) || inf_descriptor[0] != 'L')) {
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 3741c1e..4e53914 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -184,6 +184,12 @@
   return dex_file;
 }
 
+// To generate a base64 encoded Dex file (such as kGoodTestDex, below)
+// from Smali files, use:
+//
+//   smali -o classes.dex class1.smali [class2.smali ...]
+//   base64 classes.dex >classes.dex.base64
+
 // For reference.
 static const char kGoodTestDex[] =
     "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN"
@@ -1521,4 +1527,174 @@
   }
 }
 
+// To generate a base64 encoded Dex file version 037 from Smali files, use:
+//
+//   smali --api-level 24 -o classes.dex class1.smali [class2.smali ...]
+//   base64 classes.dex >classes.dex.base64
+
+// Dex file version 037 generated from:
+//
+//   .class public LB28685551;
+//   .super LB28685551;
+
+static const char kClassExtendsItselfTestDex[] =
+    "ZGV4CjAzNwDeGbgRg1kb6swszpcTWrrOAALB++F4OPT0AAAAcAAAAHhWNBIAAAAAAAAAAKgAAAAB"
+    "AAAAcAAAAAEAAAB0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAHgAAABcAAAAmAAAAJgA"
+    "AAAAAAAAAAAAAAEAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAALTEIyODY4NTU1MTsAAAAABgAA"
+    "AAAAAAABAAAAAAAAAAEAAAABAAAAcAAAAAIAAAABAAAAdAAAAAYAAAABAAAAeAAAAAIgAAABAAAA"
+    "mAAAAAAQAAABAAAAqAAAAA==";
+
+TEST_F(DexFileVerifierTest, ClassExtendsItself) {
+  VerifyModification(
+      kClassExtendsItselfTestDex,
+      "class_extends_itself",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Class with same type idx as its superclass: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public LFoo;
+//   .super LBar;
+//
+// and:
+//
+//    .class public LBar;
+//    .super LFoo;
+
+static const char kClassesExtendOneAnotherTestDex[] =
+    "ZGV4CjAzNwBXHSrwpDMwRBkg+L+JeQCuFNRLhQ86duEcAQAAcAAAAHhWNBIAAAAAAAAAANAAAAAC"
+    "AAAAcAAAAAIAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAIAAAABcAAAAwAAAAMAA"
+    "AADHAAAAAAAAAAEAAAABAAAAAQAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAAAAAABAAAAAQAA"
+    "AAAAAAD/////AAAAAAAAAAAAAAAABUxCYXI7AAVMRm9vOwAAAAYAAAAAAAAAAQAAAAAAAAABAAAA"
+    "AgAAAHAAAAACAAAAAgAAAHgAAAAGAAAAAgAAAIAAAAACIAAAAgAAAMAAAAAAEAAAAQAAANAAAAA=";
+
+TEST_F(DexFileVerifierTest, ClassesExtendOneAnother) {
+  VerifyModification(
+      kClassesExtendOneAnotherTestDex,
+      "classes_extend_one_another",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " superclass with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public LAll;
+//   .super LYour;
+//
+// and:
+//
+//   .class public LYour;
+//   .super LBase;
+//
+// and:
+//
+//   .class public LBase;
+//   .super LAll;
+
+static const char kCircularClassInheritanceTestDex[] =
+    "ZGV4CjAzNwBMJxgP0SJz6oLXnKfl+J7lSEORLRwF5LNMAQAAcAAAAHhWNBIAAAAAAAAAAAABAAAD"
+    "AAAAcAAAAAMAAAB8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAIgAAABkAAAA6AAAAOgA"
+    "AADvAAAA9wAAAAAAAAABAAAAAgAAAAEAAAABAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAgAA"
+    "AAEAAAABAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAAAAAQAAAAIAAAAAAAAA/////wAAAAAAAAAA"
+    "AAAAAAVMQWxsOwAGTEJhc2U7AAZMWW91cjsAAAYAAAAAAAAAAQAAAAAAAAABAAAAAwAAAHAAAAAC"
+    "AAAAAwAAAHwAAAAGAAAAAwAAAIgAAAACIAAAAwAAAOgAAAAAEAAAAQAAAAABAAA=";
+
+TEST_F(DexFileVerifierTest, CircularClassInheritance) {
+  VerifyModification(
+      kCircularClassInheritanceTestDex,
+      "circular_class_inheritance",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " superclass with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LInterfaceImplementsItself;
+//   .super Ljava/lang/Object;
+//   .implements LInterfaceImplementsItself;
+
+static const char kInterfaceImplementsItselfTestDex[] =
+    "ZGV4CjAzNwCKKrjatp8XbXl5S/bEVJnqaBhjZkQY4440AQAAcAAAAHhWNBIAAAAAAAAAANwAAAAC"
+    "AAAAcAAAAAIAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAIAAAACUAAAAoAAAAKAA"
+    "AAC9AAAAAAAAAAEAAAAAAAAAAQYAAAEAAADUAAAA/////wAAAAAAAAAAAAAAABtMSW50ZXJmYWNl"
+    "SW1wbGVtZW50c0l0c2VsZjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAAAAABAAAAAAAAAAcAAAAAAAAA"
+    "AQAAAAAAAAABAAAAAgAAAHAAAAACAAAAAgAAAHgAAAAGAAAAAQAAAIAAAAACIAAAAgAAAKAAAAAB"
+    "EAAAAQAAANQAAAAAEAAAAQAAANwAAAA=";
+
+TEST_F(DexFileVerifierTest, InterfaceImplementsItself) {
+  VerifyModification(
+      kInterfaceImplementsItselfTestDex,
+      "interface_implements_itself",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Class with same type idx as implemented interface: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LPing;
+//   .super Ljava/lang/Object;
+//   .implements LPong;
+//
+// and:
+//
+//   .class public abstract interface LPong;
+//   .super Ljava/lang/Object;
+//   .implements LPing;
+
+static const char kInterfacesImplementOneAnotherTestDex[] =
+    "ZGV4CjAzNwD0Kk9sxlYdg3Dy1Cff0gQCuJAQfEP6ohZUAQAAcAAAAHhWNBIAAAAAAAAAAPwAAAAD"
+    "AAAAcAAAAAMAAAB8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAIgAAACMAAAAyAAAAMgA"
+    "AADQAAAA2AAAAAAAAAABAAAAAgAAAAEAAAABBgAAAgAAAOwAAAD/////AAAAAAAAAAAAAAAAAAAA"
+    "AAEGAAACAAAA9AAAAP////8AAAAAAAAAAAAAAAAGTFBpbmc7AAZMUG9uZzsAEkxqYXZhL2xhbmcv"
+    "T2JqZWN0OwABAAAAAAAAAAEAAAABAAAABwAAAAAAAAABAAAAAAAAAAEAAAADAAAAcAAAAAIAAAAD"
+    "AAAAfAAAAAYAAAACAAAAiAAAAAIgAAADAAAAyAAAAAEQAAACAAAA7AAAAAAQAAABAAAA/AAAAA==";
+
+TEST_F(DexFileVerifierTest, InterfacesImplementOneAnother) {
+  VerifyModification(
+      kInterfacesImplementOneAnotherTestDex,
+      "interfaces_implement_one_another",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " implemented interface with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LA;
+//   .super Ljava/lang/Object;
+//   .implements LB;
+//
+// and:
+//
+//   .class public abstract interface LB;
+//   .super Ljava/lang/Object;
+//   .implements LC;
+//
+// and:
+//
+//   .class public abstract interface LC;
+//   .super Ljava/lang/Object;
+//   .implements LA;
+
+static const char kCircularInterfaceImplementationTestDex[] =
+    "ZGV4CjAzNwCzKmD5Fol6XAU6ichYHcUTIP7Z7MdTcEmEAQAAcAAAAHhWNBIAAAAAAAAAACwBAAAE"
+    "AAAAcAAAAAQAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAJAAAACUAAAA8AAAAPAA"
+    "AAD1AAAA+gAAAP8AAAAAAAAAAQAAAAIAAAADAAAAAgAAAAEGAAADAAAAHAEAAP////8AAAAAAAAA"
+    "AAAAAAABAAAAAQYAAAMAAAAUAQAA/////wAAAAAAAAAAAAAAAAAAAAABBgAAAwAAACQBAAD/////"
+    "AAAAAAAAAAAAAAAAA0xBOwADTEI7AANMQzsAEkxqYXZhL2xhbmcvT2JqZWN0OwAAAQAAAAIAAAAB"
+    "AAAAAAAAAAEAAAABAAAABwAAAAAAAAABAAAAAAAAAAEAAAAEAAAAcAAAAAIAAAAEAAAAgAAAAAYA"
+    "AAADAAAAkAAAAAIgAAAEAAAA8AAAAAEQAAADAAAAFAEAAAAQAAABAAAALAEAAA==";
+
+TEST_F(DexFileVerifierTest, CircularInterfaceImplementation) {
+  VerifyModification(
+      kCircularInterfaceImplementationTestDex,
+      "circular_interface_implementation",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '2' defined before"
+      " implemented interface with type idx: '0'");
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 3f62124..300e618 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -69,11 +69,11 @@
 
 int const Instruction::kInstructionSizeInCodeUnits[] = {
 #define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
-    ((opcode == NOP)                        ? -1 : \
-     ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k25x)) ?  2 : \
-     ((format >= k32x) && (format <= k3rc)) ?  3 : \
-      (format == k51l)                      ?  5 : -1),
+    (((opcode) == NOP)                        ? -1 :       \
+     (((format) >= k10x) && ((format) <= k10t)) ?  1 :     \
+     (((format) >= k20t) && ((format) <= k25x)) ?  2 :     \
+     (((format) >= k32x) && ((format) <= k3rc)) ?  3 :     \
+      ((format) == k51l)                      ?  5 : -1),
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_SIZE)
 #undef DEX_INSTRUCTION_LIST
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 035230e..89c3db6 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -80,7 +80,7 @@
   };
 
   enum Code {  // private marker to avoid generate-operator-out.py from processing.
-#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = opcode,
+#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = (opcode),
 #include "dex_instruction_list.h"
     DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
 #undef DEX_INSTRUCTION_LIST
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 4e4f851..c3b3ac0 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -32,7 +32,7 @@
     uint32_t type_idx, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, sizeof(void*)); \
     if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
       size_t byte_count = klass->GetObjectSize(); \
@@ -59,7 +59,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     if (LIKELY(klass->IsInitialized())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -85,7 +85,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     size_t byte_count = klass->GetObjectSize(); \
     byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
     mirror::Object* obj; \
@@ -136,7 +136,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, false, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, false, allocator_type); \
@@ -146,7 +146,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, true, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
@@ -170,7 +170,7 @@
   return mirror::String::AllocFromCharArray<instrumented_bool>(self, char_count, handle_array, \
                                                                offset, allocator_type); \
 } \
-extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( \
+extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( /* NOLINT */ \
     mirror::String* string, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   StackHandleScope<1> hs(self); \
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index c019cae..f35c2fe 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -29,39 +29,51 @@
 
 namespace art {
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-
+NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
-    LOG(INFO) << "Deopting:";
-    self->Dump(LOG(INFO));
+    if (single_frame) {
+      // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+      // specialized visitor that will show whether a method is Quick or Shadow.
+    } else {
+      LOG(INFO) << "Deopting:";
+      self->Dump(LOG(INFO));
+    }
   }
 
   self->AssertHasDeoptimizationContext();
-  self->SetException(Thread::GetDeoptimizationException());
-  self->QuickDeliverException();
+  QuickExceptionHandler exception_handler(self, true);
+  if (single_frame) {
+    exception_handler.DeoptimizeSingleFrame();
+  } else {
+    exception_handler.DeoptimizeStack();
+  }
+  uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
+  if (exception_handler.IsFullFragmentDone()) {
+    exception_handler.DoLongJump(true);
+  } else {
+    exception_handler.DeoptimizePartialFragmentFixup(return_pc);
+    // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+    // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+    // line.
+    exception_handler.DoLongJump(false);
+  }
 }
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  artDeoptimizeImpl(self, false);
+}
+
+// This is called directly from compiled code by an HDepptimize.
 extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-
-  // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
-  // specialized visitor that will show whether a method is Quick or Shadow.
-
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
   self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
-
-  QuickExceptionHandler exception_handler(self, true);
-  exception_handler.DeoptimizeSingleFrame();
-  exception_handler.UpdateInstrumentationStack();
-  exception_handler.DeoptimizeSingleFrameArchDependentFixup();
-  // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
-  // be caller-saved. This has the downside that we cannot track incorrect register usage down the
-  // line.
-  exception_handler.DoLongJump(false);
+  artDeoptimizeImpl(self, true);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index e9cdbb7..25b0ef5 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -654,7 +654,7 @@
 
   JValue tmp_value;
   ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
-      StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
+      StackedShadowFrameType::kDeoptimizationShadowFrame, false);
   ManagedStack fragment;
 
   DCHECK(!method->IsNative()) << PrettyMethod(method);
@@ -667,7 +667,7 @@
   JValue result;
 
   if (deopt_frame != nullptr) {
-    // Coming from single-frame deopt.
+    // Coming from partial-fragment deopt.
 
     if (kIsDebugBuild) {
       // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
@@ -681,7 +681,7 @@
     }
 
     if (VLOG_IS_ON(deopt)) {
-      // Print out the stack to verify that it was a single-frame deopt.
+      // Print out the stack to verify that it was a partial-fragment deopt.
       LOG(INFO) << "Continue-ing from deopt. Stack is:";
       QuickExceptionHandler::DumpFramesWithType(self, true);
     }
@@ -689,7 +689,6 @@
     mirror::Throwable* pending_exception = nullptr;
     bool from_code = false;
     self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
-    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -755,7 +754,9 @@
 
   // Request a stack deoptimization if needed
   ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
-  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+  uintptr_t caller_pc = QuickArgumentVisitor::GetCallingPc(sp);
+  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller) &&
+               Runtime::Current()->IsDeoptimizeable(caller_pc))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
     self->PushDeoptimizationContext(
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 26f5ad3..64fa434 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -28,6 +28,47 @@
 namespace gc {
 namespace collector {
 
+inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegionOrImmuneSpace(
+    mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
+  // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
+  // to gray even though the object has already been marked through. This happens if a mutator
+  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // object (changes it from white to gray and back to white), and the thread runs and
+  // incorrectly changes it from white to gray. We need to detect such "false gray" cases and
+  // change the objects back to white at the end of marking.
+  if (kUseBakerReadBarrier) {
+    // Test the bitmap first to reduce the chance of false gray cases.
+    if (bitmap->Test(ref)) {
+      return ref;
+    }
+  }
+  // This may or may not succeed, which is ok because the object may already be gray.
+  bool cas_success = false;
+  if (kUseBakerReadBarrier) {
+    cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                   ReadBarrier::GrayPtr());
+  }
+  if (bitmap->AtomicTestAndSet(ref)) {
+    // Already marked.
+    if (kUseBakerReadBarrier &&
+        cas_success &&
+        // The object could be white here if a thread gets preempted after a success at the
+        // above AtomicSetReadBarrierPointer, GC has marked through it, and the thread runs up
+        // to this point.
+        ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      // Register a "false-gray" object to change it from gray to white at the end of marking.
+      PushOntoFalseGrayStack(ref);
+    }
+  } else {
+    // Newly marked.
+    if (kUseBakerReadBarrier) {
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+    }
+    PushOntoMarkStack(ref);
+  }
+  return ref;
+}
+
 inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
   if (from_ref == nullptr) {
     return nullptr;
@@ -68,21 +109,7 @@
       return to_ref;
     }
     case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
-      // This may or may not succeed, which is ok.
-      if (kUseBakerReadBarrier) {
-        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-      }
-      mirror::Object* to_ref = from_ref;
-      if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
-        // Already marked.
-      } else {
-        // Newly marked.
-        if (kUseBakerReadBarrier) {
-          DCHECK_EQ(to_ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-        }
-        PushOntoMarkStack(to_ref);
-      }
-      return to_ref;
+      return MarkUnevacFromSpaceRegionOrImmuneSpace(from_ref, region_space_bitmap_);
     }
     case space::RegionSpace::RegionType::kRegionTypeNone:
       return MarkNonMoving(from_ref);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d393f0b..3f8f628 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -165,6 +165,10 @@
               << reinterpret_cast<void*>(region_space_->Limit());
   }
   CheckEmptyMarkStack();
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    CHECK(false_gray_stack_.empty());
+  }
   immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
@@ -247,6 +251,9 @@
     }
     cc->is_marking_ = true;
     cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal);
+    if (kIsDebugBuild) {
+      cc->region_space_->AssertAllRegionLiveBytesZeroOrCleared();
+    }
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
@@ -314,17 +321,7 @@
       DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj))
           << "Immune space object must be already marked";
     }
-    // This may or may not succeed, which is ok.
-    if (kUseBakerReadBarrier) {
-      obj->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (cc_bitmap->AtomicTestAndSet(obj)) {
-      // Already marked. Do nothing.
-    } else {
-      // Newly marked. Set the gray bit and push it onto the mark stack.
-      CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      collector_->PushOntoMarkStack(obj);
-    }
+    collector_->MarkUnevacFromSpaceRegionOrImmuneSpace(obj, cc_bitmap);
   }
 
  private:
@@ -459,6 +456,9 @@
     Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
     // Marking is done. Disable marking.
     DisableMarking();
+    if (kUseBakerReadBarrier) {
+      ProcessFalseGrayStack();
+    }
     CheckEmptyMarkStack();
   }
 
@@ -548,6 +548,32 @@
   mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff);
 }
 
+void ConcurrentCopying::PushOntoFalseGrayStack(mirror::Object* ref) {
+  CHECK(kUseBakerReadBarrier);
+  DCHECK(ref != nullptr);
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  false_gray_stack_.push_back(ref);
+}
+
+void ConcurrentCopying::ProcessFalseGrayStack() {
+  CHECK(kUseBakerReadBarrier);
+  // Change the objects on the false gray stack from gray to white.
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  for (mirror::Object* obj : false_gray_stack_) {
+    DCHECK(IsMarked(obj));
+    // The object could be white here if a thread got preempted after a success at the
+    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // still gray), and the thread ran to register it onto the false gray stack.
+    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+  }
+  false_gray_stack_.clear();
+}
+
+
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
   EmptyCheckpoint check_point(this);
@@ -655,8 +681,8 @@
   return heap_->live_stack_.get();
 }
 
-// The following visitors are that used to verify that there's no
-// references to the from-space left after marking.
+// The following visitors are used to verify that there's no references to the from-space left after
+// marking.
 class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
   explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
@@ -670,20 +696,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      if (collector_->RegionSpace()->IsInToSpace(ref)) {
-        CHECK(ref->GetReadBarrierPointer() == nullptr)
-            << "To-space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
-      } else {
-        CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector_->IsOnAllocStack(ref)))
-            << "Non-moving/unevac from space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-black rb_ptr " << ref->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr)."
-            << " Is it in the non-moving space="
-            << (collector_->GetHeap()->GetNonMovingSpace()->HasAddress(ref));
-      }
+      CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+          << "Ref " << ref << " " << PrettyTypeOf(ref)
+          << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
     }
   }
 
@@ -749,18 +764,8 @@
     ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      if (collector->RegionSpace()->IsInToSpace(obj)) {
-        CHECK(obj->GetReadBarrierPointer() == nullptr)
-            << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
-      } else {
-        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector->IsOnAllocStack(obj)))
-            << "Non-moving space/unevac from space ref " << obj << " " << PrettyTypeOf(obj)
-            << " has non-black rb_ptr " << obj->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr). Is it in the non-moving space="
-            << (collector->GetHeap()->GetNonMovingSpace()->HasAddress(obj));
-      }
+      CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
     }
   }
 
@@ -1069,7 +1074,6 @@
   }
   // Scan ref fields.
   Scan(to_ref);
-  // Mark the gray ref as white or black.
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
         << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
@@ -1079,41 +1083,34 @@
   if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
                 to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
-    // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
-    // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
+    // Leave this reference gray in the queue so that GetReferent() will trigger a read barrier. We
+    // will change it to white later in ReferenceQueue::DequeuePendingReference().
     DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
   } else {
-    // We may occasionally leave a Reference black or white in the queue if its referent happens to
-    // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
-    // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
-    // here in this else block.
+    // We may occasionally leave a reference white in the queue if its referent happens to be
+    // concurrently marked after the Scan() call above has enqueued the Reference, in which case the
+    // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
+    // else block.
     if (kUseBakerReadBarrier) {
-      if (region_space_->IsInToSpace(to_ref)) {
-        // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-            ReadBarrier::GrayPtr(),
-            ReadBarrier::WhitePtr());
-        DCHECK(success) << "Must succeed as we won the race.";
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-      } else {
-        // If non-moving space/unevac from space, change from gray
-        // to black. We can't change gray to white because it's not
-        // safe to use CAS if two threads change values in opposite
-        // directions (A->B and B->A). So, we change it to black to
-        // indicate non-moving objects that have been marked
-        // through. Note we'd need to change from black to white
-        // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-            ReadBarrier::GrayPtr(),
-            ReadBarrier::BlackPtr());
-        DCHECK(success) << "Must succeed as we won the race.";
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
-      }
+      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+          ReadBarrier::GrayPtr(),
+          ReadBarrier::WhitePtr());
+      DCHECK(success) << "Must succeed as we won the race.";
     }
   }
 #else
   DCHECK(!kUseBakerReadBarrier);
 #endif
+
+  if (region_space_->IsInUnevacFromSpace(to_ref)) {
+    // Add to the live bytes per unevacuated from space. Note this code is always run by the
+    // GC-running thread (no synchronization required).
+    DCHECK(region_space_bitmap_->Test(to_ref));
+    // Disable the read barrier in SizeOf for performance, which is safe.
+    size_t obj_size = to_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
+    region_space_->AddLiveBytes(to_ref, alloc_size);
+  }
   if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
     ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
@@ -1226,61 +1223,6 @@
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
-class ConcurrentCopyingClearBlackPtrsVisitor {
- public:
-  explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Clear the black ptrs in non-moving objects back to white.
-void ConcurrentCopying::ClearBlackPtrs() {
-  CHECK(kUseBakerReadBarrier);
-  TimingLogger::ScopedTiming split("ClearBlackPtrs", GetTimings());
-  ConcurrentCopyingClearBlackPtrsVisitor visitor(this);
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (space == region_space_) {
-      continue;
-    }
-    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    if (kVerboseMode) {
-      LOG(INFO) << "ClearBlackPtrs: " << *space << " bitmap: " << *mark_bitmap;
-    }
-    mark_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
-  }
-  space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
-  large_object_space->GetMarkBitmap()->VisitMarkedRange(
-      reinterpret_cast<uintptr_t>(large_object_space->Begin()),
-      reinterpret_cast<uintptr_t>(large_object_space->End()),
-      visitor);
-  // Objects on the allocation stack?
-  if (ReadBarrier::kEnableReadBarrierInvariantChecks || kIsDebugBuild) {
-    size_t count = GetAllocationStack()->Size();
-    auto* it = GetAllocationStack()->Begin();
-    auto* end = GetAllocationStack()->End();
-    for (size_t i = 0; i < count; ++i, ++it) {
-      CHECK_LT(it, end);
-      mirror::Object* obj = it->AsMirrorPtr();
-      if (obj != nullptr) {
-        // Must have been cleared above.
-        CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-      }
-    }
-  }
-}
-
 void ConcurrentCopying::ReclaimPhase() {
   TimingLogger::ScopedTiming split("ReclaimPhase", GetTimings());
   if (kVerboseMode) {
@@ -1338,20 +1280,12 @@
   }
 
   {
-    TimingLogger::ScopedTiming split3("ComputeUnevacFromSpaceLiveRatio", GetTimings());
-    ComputeUnevacFromSpaceLiveRatio();
-  }
-
-  {
     TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
     region_space_->ClearFromSpace();
   }
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (kUseBakerReadBarrier) {
-      ClearBlackPtrs();
-    }
     Sweep(false);
     SwapBitmaps();
     heap_->UnBindBitmaps();
@@ -1373,39 +1307,6 @@
   }
 }
 
-class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor {
- public:
-  explicit ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-  void operator()(mirror::Object* ref) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(ref != nullptr);
-    DCHECK(collector_->region_space_bitmap_->Test(ref)) << ref;
-    DCHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref;
-    if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
-      // Clear the black ptr.
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
-    }
-    size_t obj_size = ref->SizeOf();
-    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
-    collector_->region_space_->AddLiveBytes(ref, alloc_size);
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Compute how much live objects are left in regions.
-void ConcurrentCopying::ComputeUnevacFromSpaceLiveRatio() {
-  region_space_->AssertAllRegionLiveBytesZeroOrCleared();
-  ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor visitor(this);
-  region_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(region_space_->Begin()),
-                                         reinterpret_cast<uintptr_t>(region_space_->Limit()),
-                                         visitor);
-}
-
 // Assert the to-space invariant.
 void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
                                                mirror::Object* ref) {
@@ -1999,19 +1900,7 @@
       DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref))
           << "Immune space object must be already marked";
     }
-    // This may or may not succeed, which is ok.
-    if (kUseBakerReadBarrier) {
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (cc_bitmap->AtomicTestAndSet(ref)) {
-      // Already marked.
-    } else {
-      // Newly marked.
-      if (kUseBakerReadBarrier) {
-        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-      }
-      PushOntoMarkStack(ref);
-    }
+    MarkUnevacFromSpaceRegionOrImmuneSpace(ref, cc_bitmap);
   } else {
     // Use the mark bitmap.
     accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -2024,13 +1913,13 @@
       // Already marked.
       if (kUseBakerReadBarrier) {
         DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+               ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       }
     } else if (is_los && los_bitmap->Test(ref)) {
       // Already marked in LOS.
       if (kUseBakerReadBarrier) {
         DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+               ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       }
     } else {
       // Not marked.
@@ -2046,15 +1935,34 @@
           DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
         }
       } else {
+        // For the baker-style RB, we need to handle 'false-gray' cases. See the
+        // kRegionTypeUnevacFromSpace-case comment in Mark().
+        if (kUseBakerReadBarrier) {
+          // Test the bitmap first to reduce the chance of false gray cases.
+          if ((!is_los && mark_bitmap->Test(ref)) ||
+              (is_los && los_bitmap->Test(ref))) {
+            return ref;
+          }
+        }
         // Not marked or on the allocation stack. Try to mark it.
         // This may or may not succeed, which is ok.
+        bool cas_success = false;
         if (kUseBakerReadBarrier) {
-          ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+          cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                         ReadBarrier::GrayPtr());
         }
         if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
           // Already marked.
+          if (kUseBakerReadBarrier && cas_success &&
+              ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            PushOntoFalseGrayStack(ref);
+          }
         } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
           // Already marked in LOS.
+          if (kUseBakerReadBarrier && cas_success &&
+              ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            PushOntoFalseGrayStack(ref);
+          }
         } else {
           // Newly marked.
           if (kUseBakerReadBarrier) {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 76315fe..e9ff618 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -160,8 +160,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
   void SweepLargeObjects(bool swap_bitmaps)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
-  void ClearBlackPtrs()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
   void FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* AllocateInSkippedBlock(size_t alloc_size)
@@ -185,10 +183,19 @@
   void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegionOrImmuneSpace(mirror::Object* from_ref,
+      accounting::SpaceBitmap<kObjectAlignment>* bitmap)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  void PushOntoFalseGrayStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void ProcessFalseGrayStack() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
   std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+  std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
   Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<accounting::ObjectStack*> revoked_mark_stacks_
       GUARDED_BY(mark_stack_lock_);
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 4ffc8af..c602081 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -49,6 +49,8 @@
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
+  // Class linker fake collector.
+  kCollectorTypeClassLinker,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 18e5703..ad9bb92 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -36,6 +36,7 @@
     case kGcCauseInstrumentation: return "Instrumentation";
     case kGcCauseAddRemoveAppImageSpace: return "AddRemoveAppImageSpace";
     case kGcCauseDebugger: return "Debugger";
+    case kGcCauseClassLinker: return "ClassLinker";
     default:
       LOG(FATAL) << "Unreachable";
       UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index ad67eb7..797ec34 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -47,6 +47,8 @@
   kGcCauseDebugger,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
+  // Class linker cause, used to guard filling art methods with special values.
+  kGcCauseClassLinker,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 03ab9a1..6088a43 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -68,31 +68,19 @@
   Heap* heap = Runtime::Current()->GetHeap();
   if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
       heap->ConcurrentCopyingCollector()->IsActive()) {
-    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to black or white.
+    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to white.
     // We check IsActive() above because we don't want to do this when the zygote compaction
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    const bool is_moving = concurrent_copying->RegionSpace()->IsInToSpace(ref);
-    if (ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      if (is_moving) {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
-      } else {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
-      }
+    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
+    if (rb_ptr == ReadBarrier::GrayPtr()) {
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
-      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a black or white Reference in the
-      // queue and find it here, which is OK. Check that the color makes sense depending on whether
-      // the Reference is moving or not and that the referent has been marked.
-      if (is_moving) {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      } else {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      }
+      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
+      // find it here, which is OK.
+      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
       mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index e7786a1..b5eb979 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -38,4 +38,3 @@
 
 }  // namespace gc
 }  // namespace art
-
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 4e56c4a..c6b2870 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -39,7 +39,7 @@
     int rc = call args; \
     if (UNLIKELY(rc != 0)) { \
       errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
+      PLOG(FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 9a2d0c6..5d710bf 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -216,17 +216,6 @@
   evac_region_ = nullptr;
 }
 
-void RegionSpace::AssertAllRegionLiveBytesZeroOrCleared() {
-  if (kIsDebugBuild) {
-    MutexLock mu(Thread::Current(), region_lock_);
-    for (size_t i = 0; i < num_regions_; ++i) {
-      Region* r = &regions_[i];
-      size_t live_bytes = r->LiveBytes();
-      CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
-    }
-  }
-}
-
 void RegionSpace::LogFragmentationAllocFailure(std::ostream& os,
                                                size_t /* failed_alloc_bytes */) {
   size_t max_contiguous_allocation = 0;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 14e8005..4e8dfe8 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -215,7 +215,16 @@
     reg->AddLiveBytes(alloc_size);
   }
 
-  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_);
+  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_) {
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), region_lock_);
+      for (size_t i = 0; i < num_regions_; ++i) {
+        Region* r = &regions_[i];
+        size_t live_bytes = r->LiveBytes();
+        CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
+      }
+    }
+  }
 
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
   bool AllocNewTlab(Thread* self) REQUIRES(!region_lock_);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 61119f8..7dfc83f 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1088,7 +1088,7 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
-  if (deoptimize) {
+  if (deoptimize && Runtime::Current()->IsDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
                                 PrettyMethod(visitor.caller).c_str(),
@@ -1110,7 +1110,7 @@
   }
 }
 
-void Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
+uintptr_t Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1134,6 +1134,7 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  return instrumentation_frame.return_pc_;
 }
 
 std::string InstrumentationStackFrame::Dump() const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index ce6ead4..49dd060 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -402,7 +402,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
-  void PopMethodForUnwind(Thread* self, bool is_deoptimization) const
+  // Returns the return pc for the instrumentation frame that's popped.
+  uintptr_t PopMethodForUnwind(Thread* self, bool is_deoptimization) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Call back for configure stubs.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 1d0e600..8c42b3a 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -484,6 +484,36 @@
   self->PopShadowFrame();
 }
 
+static bool IsStringInit(const Instruction* instr, ArtMethod* caller)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (instr->Opcode() == Instruction::INVOKE_DIRECT ||
+      instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) {
+    // Instead of calling ResolveMethod() which has suspend point and can trigger
+    // GC, look up the callee method symbolically.
+    uint16_t callee_method_idx = (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+        instr->VRegB_3rc() : instr->VRegB_35c();
+    const DexFile* dex_file = caller->GetDexFile();
+    const DexFile::MethodId& method_id = dex_file->GetMethodId(callee_method_idx);
+    const char* class_name = dex_file->StringByTypeIdx(method_id.class_idx_);
+    const char* method_name = dex_file->GetMethodName(method_id);
+    // Compare method's class name and method name against string init.
+    // It's ok since it's not allowed to create your own java/lang/String.
+    // TODO: verify that assumption.
+    if ((strcmp(class_name, "Ljava/lang/String;") == 0) &&
+        (strcmp(method_name, "<init>") == 0)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static int16_t GetReceiverRegisterForStringInit(const Instruction* instr) {
+  DCHECK(instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE ||
+         instr->Opcode() == Instruction::INVOKE_DIRECT);
+  return (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+      instr->VRegC_3rc() : instr->VRegC_35c();
+}
+
 void EnterInterpreterFromDeoptimize(Thread* self,
                                     ShadowFrame* shadow_frame,
                                     bool from_code,
@@ -519,22 +549,38 @@
       // TODO: should be tested more once b/17586779 is fixed.
       const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
       if (instr->IsInvoke()) {
+        if (IsStringInit(instr, shadow_frame->GetMethod())) {
+          uint16_t this_obj_vreg = GetReceiverRegisterForStringInit(instr);
+          // Move the StringFactory.newStringFromChars() result into the register representing
+          // "this object" when invoking the string constructor in the original dex instruction.
+          // Also move the result into all aliases.
+          DCHECK(value.GetL()->IsString());
+          SetStringInitValueToAllAliases(shadow_frame, this_obj_vreg, value);
+          // Calling string constructor in the original dex code doesn't generate a result value.
+          value.SetJ(0);
+        }
         new_dex_pc = dex_pc + instr->SizeInCodeUnits();
       } else if (instr->Opcode() == Instruction::NEW_INSTANCE) {
         // It's possible to deoptimize at a NEW_INSTANCE dex instruciton that's for a
         // java string, which is turned into a call into StringFactory.newEmptyString();
+        // Move the StringFactory.newEmptyString() result into the destination register.
+        DCHECK(value.GetL()->IsString());
+        shadow_frame->SetVRegReference(instr->VRegA_21c(), value.GetL());
+        // new-instance doesn't generate a result value.
+        value.SetJ(0);
+        // Skip the dex instruction since we essentially come back from an invocation.
+        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
         if (kIsDebugBuild) {
           ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+          // This is a suspend point. But it's ok since value has been set into shadow_frame.
           mirror::Class* klass = class_linker->ResolveType(
               instr->VRegB_21c(), shadow_frame->GetMethod());
           DCHECK(klass->IsStringClass());
         }
-        // Skip the dex instruction since we essentially come back from an invocation.
-        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
       } else {
-        DCHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
-                      << " at dex_pc " << dex_pc
-                      << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
+        CHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
+                     << " at dex_pc " << dex_pc
+                     << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
       }
     } else {
       // Nothing to do, the dex_pc is the one at which the code requested
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 12d70c5..53d5e43 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -540,6 +540,30 @@
                  result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
 }
 
+void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
+                                    uint16_t this_obj_vreg,
+                                    JValue result)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Object* existing = shadow_frame->GetVRegReference(this_obj_vreg);
+  if (existing == nullptr) {
+    // If it's null, we come from compiled code that was deoptimized. Nothing to do,
+    // as the compiler verified there was no alias.
+    // Set the new string result of the StringFactory.
+    shadow_frame->SetVRegReference(this_obj_vreg, result.GetL());
+    return;
+  }
+  // Set the string init result into all aliases.
+  for (uint32_t i = 0, e = shadow_frame->NumberOfVRegs(); i < e; ++i) {
+    if (shadow_frame->GetVRegReference(i) == existing) {
+      DCHECK_EQ(shadow_frame->GetVRegReference(i),
+                reinterpret_cast<mirror::Object*>(shadow_frame->GetVReg(i)));
+      shadow_frame->SetVRegReference(i, result.GetL());
+      DCHECK_EQ(shadow_frame->GetVRegReference(i),
+                reinterpret_cast<mirror::Object*>(shadow_frame->GetVReg(i)));
+    }
+  }
+}
+
 template <bool is_range,
           bool do_assignability_check,
           size_t kVarArgMax>
@@ -739,24 +763,7 @@
   }
 
   if (string_init && !self->IsExceptionPending()) {
-    mirror::Object* existing = shadow_frame.GetVRegReference(string_init_vreg_this);
-    if (existing == nullptr) {
-      // If it's null, we come from compiled code that was deoptimized. Nothing to do,
-      // as the compiler verified there was no alias.
-      // Set the new string result of the StringFactory.
-      shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
-    } else {
-      // Replace the fake string that was allocated with the StringFactory result.
-      for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
-        if (shadow_frame.GetVRegReference(i) == existing) {
-          DCHECK_EQ(shadow_frame.GetVRegReference(i),
-                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
-          shadow_frame.SetVRegReference(i, result->GetL());
-          DCHECK_EQ(shadow_frame.GetVRegReference(i),
-                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
-        }
-      }
-    }
+    SetStringInitValueToAllAliases(&shadow_frame, string_init_vreg_this, *result);
   }
 
   return !self->IsExceptionPending();
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 69376fd..cc470f3 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -1021,6 +1021,12 @@
                                         ShadowFrame* shadow_frame,
                                         JValue* result);
 
+// Set string value created from StringFactory.newStringFromXXX() into all aliases of
+// StringFactory.newEmptyString().
+void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
+                                    uint16_t this_obj_vreg,
+                                    JValue result);
+
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                     \
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index b6b7eb1..ae5a0f6 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -444,6 +444,13 @@
       return false;
     }
 
+    // Before allowing the jump, make sure the debugger is not active to avoid jumping from
+    // interpreter to OSR while e.g. single stepping. Note that we could selectively disable
+    // OSR when single stepping, but that's currently hard to know at this point.
+    if (Dbg::IsDebuggerActive()) {
+      return false;
+    }
+
     // We found a stack map, now fill the frame with dex register values from the interpreter's
     // shadow frame.
     DexRegisterMap vreg_map =
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 8358ce3..9822f6e 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -35,6 +35,8 @@
 // with all profile savers running at the same time.
 static constexpr const uint64_t kMinSavePeriodNs = MsToNs(20 * 1000);  // 20 seconds
 static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+// Minimum number of JIT samples during launch to include a method into the profile.
+static constexpr const size_t kStartupMethodSamples = 1;
 
 static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
 static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
@@ -97,26 +99,48 @@
   // classes save (unless they started before the initial saving was done).
   {
     MutexLock mu(self, wait_lock_);
-    period_condition_.TimedWait(self, kSaveResolvedClassesDelayMs, 0);
+    constexpr uint64_t kSleepTime = kSaveResolvedClassesDelayMs;
+    const uint64_t end_time = NanoTime() + MsToNs(kSleepTime);
+    while (true) {
+      const uint64_t current_time = NanoTime();
+      if (current_time >= end_time) {
+        break;
+      }
+      period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
+    }
     total_ms_of_sleep_ += kSaveResolvedClassesDelayMs;
   }
-  FetchAndCacheResolvedClasses();
+  FetchAndCacheResolvedClassesAndMethods();
 
   // Loop for the profiled methods.
   while (!ShuttingDown(self)) {
     uint64_t sleep_start = NanoTime();
     {
-      MutexLock mu(self, wait_lock_);
-      period_condition_.Wait(self);
+      uint64_t sleep_time = 0;
+      {
+        MutexLock mu(self, wait_lock_);
+        period_condition_.Wait(self);
+        sleep_time = NanoTime() - sleep_start;
+      }
+      // Check if the thread was woken up for shutdown.
+      if (ShuttingDown(self)) {
+        break;
+      }
       total_number_of_wake_ups_++;
       // We might have been woken up by a huge number of notifications to guarantee saving.
       // If we didn't meet the minimum saving period go back to sleep (only if missed by
       // a reasonable margin).
-      uint64_t sleep_time = NanoTime() - last_time_ns_saver_woke_up_;
-      while (kMinSavePeriodNs - sleep_time > (kMinSavePeriodNs / 10)) {
-        period_condition_.TimedWait(self, NsToMs(kMinSavePeriodNs - sleep_time), 0);
+      while (kMinSavePeriodNs * 0.9 > sleep_time) {
+        {
+          MutexLock mu(self, wait_lock_);
+          period_condition_.TimedWait(self, NsToMs(kMinSavePeriodNs - sleep_time), 0);
+          sleep_time = NanoTime() - sleep_start;
+        }
+        // Check if the thread was woken up for shutdown.
+        if (ShuttingDown(self)) {
+          break;
+        }
         total_number_of_wake_ups_++;
-        sleep_time = NanoTime() - last_time_ns_saver_woke_up_;
       }
     }
     total_ms_of_sleep_ += NsToMs(NanoTime() - sleep_start);
@@ -182,11 +206,48 @@
   return &info_it->second;
 }
 
-void ProfileSaver::FetchAndCacheResolvedClasses() {
+// Get resolved methods that have a profile info or more than kStartupMethodSamples samples.
+// Excludes native methods and classes in the boot image.
+class GetMethodsVisitor : public ClassVisitor {
+ public:
+  explicit GetMethodsVisitor(std::vector<MethodReference>* methods) : methods_(methods) {}
+
+  virtual bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      return true;
+    }
+    for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
+      if (!method.IsNative()) {
+        if (method.GetCounter() >= kStartupMethodSamples ||
+            method.GetProfilingInfo(sizeof(void*)) != nullptr) {
+          // Have samples, add to profile.
+          const DexFile* dex_file = method.GetInterfaceMethodIfProxy(sizeof(void*))->GetDexFile();
+          methods_->push_back(MethodReference(dex_file, method.GetDexMethodIndex()));
+        }
+      }
+    }
+    return true;
+  }
+
+ private:
+  std::vector<MethodReference>* const methods_;
+};
+
+void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   std::set<DexCacheResolvedClasses> resolved_classes =
       class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
+
+  std::vector<MethodReference> methods;
+  {
+    ScopedTrace trace2("Get hot methods");
+    GetMethodsVisitor visitor(&methods);
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker->VisitClasses(&visitor);
+    VLOG(profiler) << "Methods with samples greater than "
+                   << kStartupMethodSamples << " = " << methods.size();
+  }
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   uint64_t total_number_of_profile_entries_cached = 0;
 
@@ -194,11 +255,16 @@
     std::set<DexCacheResolvedClasses> resolved_classes_for_location;
     const std::string& filename = it.first;
     const std::set<std::string>& locations = it.second;
-
+    std::vector<MethodReference> methods_for_location;
+    for (const MethodReference& ref : methods) {
+      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
+        methods_for_location.push_back(ref);
+      }
+    }
     for (const DexCacheResolvedClasses& classes : resolved_classes) {
       if (locations.find(classes.GetBaseLocation()) != locations.end()) {
-        VLOG(profiler) << "Added classes for location " << classes.GetBaseLocation()
-                       << " (" << classes.GetDexLocation() << ")";
+        VLOG(profiler) << "Added " << classes.GetClasses().size() << " classes for location "
+                       << classes.GetBaseLocation() << " (" << classes.GetDexLocation() << ")";
         resolved_classes_for_location.insert(classes);
       } else {
         VLOG(profiler) << "Location not found " << classes.GetBaseLocation()
@@ -206,7 +272,7 @@
       }
     }
     ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
-    info->AddMethodsAndClasses(std::vector<MethodReference>(), resolved_classes_for_location);
+    info->AddMethodsAndClasses(methods_for_location, resolved_classes_for_location);
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index c6da959..9c6d0fa 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -95,9 +95,9 @@
   // If no entry exists, a new empty one will be created, added to the cache and
   // then returned.
   ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
-  // Fetches the current resolved classes from the ClassLinker and stores them
-  // in the profile_cache_ for later save.
-  void FetchAndCacheResolvedClasses();
+  // Fetches the current resolved classes and methods from the ClassLinker and stores them in the
+  // profile_cache_ for later save.
+  void FetchAndCacheResolvedClassesAndMethods();
 
   static bool MaybeRecordDexUseInternal(
       const std::string& dex_location,
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 7bd85ec..8cdf96d 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -301,13 +301,13 @@
     CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, return_val)
 
 #define CHECK_NON_NULL_ARGUMENT_FN_NAME(name, value, return_val) \
-  if (UNLIKELY(value == nullptr)) { \
+  if (UNLIKELY((value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(name, #value " == null"); \
     return return_val; \
   }
 
 #define CHECK_NON_NULL_MEMCPY_ARGUMENT(length, value) \
-  if (UNLIKELY(length != 0 && value == nullptr)) { \
+  if (UNLIKELY((length) != 0 && (value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(__FUNCTION__, #value " == null"); \
     return; \
   }
diff --git a/runtime/lambda/shorty_field_type.h b/runtime/lambda/shorty_field_type.h
index 46ddaa9..c314fd2 100644
--- a/runtime/lambda/shorty_field_type.h
+++ b/runtime/lambda/shorty_field_type.h
@@ -391,7 +391,7 @@
 
  private:
 #define IS_VALID_TYPE_SPECIALIZATION(type, name) \
-  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { \
+  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { /*NOLINT*/ \
     return true; \
   } \
   \
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index dfb728f..fcdfc88 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -419,8 +419,6 @@
       }
       return false;
     }
-    DCHECK_EQ(this->CanAccessMember(access_to, method->GetAccessFlags()),
-              this->CanAccessMember(dex_access_to, method->GetAccessFlags()));
   }
   if (LIKELY(this->CanAccessMember(access_to, method->GetAccessFlags()))) {
     return true;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 2da3d84..2894b68 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -148,9 +148,7 @@
   for (size_t i = 0, count = NumStrings(); i < count; ++i) {
     mirror::String* source = src[i].Read<kReadBarrierOption>();
     mirror::String* new_source = visitor(source);
-    if (source != new_source) {
-      dest[i] = GcRoot<mirror::String>(new_source);
-    }
+    dest[i] = GcRoot<mirror::String>(new_source);
   }
 }
 
@@ -160,9 +158,7 @@
   for (size_t i = 0, count = NumResolvedTypes(); i < count; ++i) {
     mirror::Class* source = src[i].Read<kReadBarrierOption>();
     mirror::Class* new_source = visitor(source);
-    if (source != new_source) {
-      dest[i] = GcRoot<mirror::Class>(new_source);
-    }
+    dest[i] = GcRoot<mirror::Class>(new_source);
   }
 }
 
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index f4bc222..71c866f 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -964,17 +964,13 @@
           if (!kUseReadBarrier) {
             DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
             h_obj->SetLockWord(new_lw, true);
-            if (ATRACE_ENABLED()) {
-              ATRACE_END();
-            }
+            AtraceMonitorUnlock();
             // Success!
             return true;
           } else {
             // Use CAS to preserve the read barrier state.
             if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, new_lw)) {
-              if (ATRACE_ENABLED()) {
-                ATRACE_END();
-              }
+              AtraceMonitorUnlock();
               // Success!
               return true;
             }
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 0126b4d..f30f7a6 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -475,15 +475,22 @@
 
 // public API
 static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
-  const char* instruction_set = GetInstructionSetString(kRuntimeISA);
-  ScopedUtfChars filename(env, javaFilename);
-  jint status = GetDexOptNeeded(
-      env,
-      filename.c_str(),
-      instruction_set,
-      "speed-profile",
-      /*profile_changed*/false);
-  return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
+  ScopedUtfChars filename_utf(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return JNI_FALSE;
+  }
+
+  const char* filename = filename_utf.c_str();
+  if ((filename == nullptr) || !OS::FileExists(filename)) {
+    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
+    ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
+    const char* message = (filename == nullptr) ? "<empty file name>" : filename;
+    env->ThrowNew(fnfe.get(), message);
+    return JNI_FALSE;
+  }
+
+  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false, false);
+  return oat_file_assistant.IsUpToDate() ? JNI_FALSE : JNI_TRUE;
 }
 
 static jboolean DexFile_isValidCompilerFilter(JNIEnv* env,
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 2295cb4..e9b0d3c 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -46,6 +46,7 @@
       DCHECK(caller == nullptr);
       if (count == n) {
         caller = m;
+        caller_pc = GetCurrentQuickFramePc();
         return false;
       }
       count++;
@@ -57,6 +58,7 @@
   const bool include_runtime_and_upcalls_;
   size_t count;
   ArtMethod* caller;
+  uintptr_t caller_pc;
 };
 
 }  // namespace art
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 80231f3..aab0e81 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -182,8 +182,12 @@
 
 void OatHeader::UpdateChecksum(const void* data, size_t length) {
   DCHECK(IsValid());
-  const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
-  adler32_checksum_ = adler32(adler32_checksum_, bytes, length);
+  if (data != nullptr) {
+    const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
+    adler32_checksum_ = adler32(adler32_checksum_, bytes, length);
+  } else {
+    DCHECK_EQ(0U, length);
+  }
 }
 
 InstructionSet OatHeader::GetInstructionSet() const {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index ae84019..995ea99 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -98,6 +98,8 @@
   virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
                                                   std::string* error_msg) const = 0;
 
+  virtual void PreLoad() = 0;
+
   virtual bool Load(const std::string& elf_filename,
                     uint8_t* oat_file_begin,
                     bool writable,
@@ -138,6 +140,9 @@
                                       const char* abs_dex_location,
                                       std::string* error_msg) {
   std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+
+  ret->PreLoad();
+
   if (!ret->Load(elf_filename,
                  oat_file_begin,
                  writable,
@@ -150,6 +155,7 @@
   if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
     return nullptr;
   }
+
   ret->PreSetup(elf_filename);
 
   if (!ret->Setup(abs_dex_location, error_msg)) {
@@ -484,39 +490,23 @@
 // OatFile via dlopen //
 ////////////////////////
 
-static bool RegisterOatFileLocation(const std::string& location) {
-  if (!kIsTargetBuild) {
-    Runtime* const runtime = Runtime::Current();
-    if (runtime != nullptr && !runtime->IsAotCompiler()) {
-      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
-    }
-    return false;
-  }
-  return true;
-}
-
-static void UnregisterOatFileLocation(const std::string& location) {
-  if (!kIsTargetBuild) {
-    Runtime* const runtime = Runtime::Current();
-    if (runtime != nullptr && !runtime->IsAotCompiler()) {
-      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
-    }
-  }
-}
-
 class DlOpenOatFile FINAL : public OatFileBase {
  public:
   DlOpenOatFile(const std::string& filename, bool executable)
       : OatFileBase(filename, executable),
         dlopen_handle_(nullptr),
-        first_oat_(RegisterOatFileLocation(filename)) {
+        shared_objects_before_(0) {
   }
 
   ~DlOpenOatFile() {
     if (dlopen_handle_ != nullptr) {
       dlclose(dlopen_handle_);
+
+      if (!kIsTargetBuild) {
+        MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
+        host_dlopen_handles_.erase(dlopen_handle_);
+      }
     }
-    UnregisterOatFileLocation(GetLocation());
   }
 
  protected:
@@ -530,6 +520,8 @@
     return ptr;
   }
 
+  void PreLoad() OVERRIDE;
+
   bool Load(const std::string& elf_filename,
             uint8_t* oat_file_begin,
             bool writable,
@@ -545,18 +537,54 @@
               uint8_t* oat_file_begin,
               std::string* error_msg);
 
+  // On the host, if the same library is loaded again with dlopen the same
+  // file handle is returned. This differs from the behavior of dlopen on the
+  // target, where dlopen reloads the library at a different address every
+  // time you load it. The runtime relies on the target behavior to ensure
+  // each instance of the loaded library has a unique dex cache. To avoid
+  // problems, we fall back to our own linker in the case when the same
+  // library is opened multiple times on host. dlopen_handles_ is used to
+  // detect that case.
+  // Guarded by host_dlopen_handles_lock_;
+  static std::unordered_set<void*> host_dlopen_handles_;
+
   // dlopen handle during runtime.
   void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
 
   // Dummy memory map objects corresponding to the regions mapped by dlopen.
   std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
 
-  // Track the registration status (= was this the first oat file) for the location.
-  const bool first_oat_;
+  // The number of shared objects the linker told us about before loading. Used to
+  // (optimistically) optimize the PreSetup stage (see comment there).
+  size_t shared_objects_before_;
 
   DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
 };
 
+std::unordered_set<void*> DlOpenOatFile::host_dlopen_handles_;
+
+void DlOpenOatFile::PreLoad() {
+#ifdef __APPLE__
+  UNUSED(shared_objects_before_);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  // Count the entries in dl_iterate_phdr we get at this point in time.
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info ATTRIBUTE_UNUSED,
+                        size_t size ATTRIBUTE_UNUSED,
+                        void *data) {
+      reinterpret_cast<dl_iterate_context*>(data)->count++;
+      return 0;  // Continue iteration.
+    }
+    size_t count = 0;
+  } context;
+
+  dl_iterate_phdr(dl_iterate_context::callback, &context);
+  shared_objects_before_ = context.count;
+#endif
+}
+
 bool DlOpenOatFile::Load(const std::string& elf_filename,
                          uint8_t* oat_file_begin,
                          bool writable,
@@ -593,12 +621,6 @@
       *error_msg = "DlOpen disabled for host.";
       return false;
     }
-    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
-    // stored in the first_oat_ flag.
-    if (!first_oat_) {
-      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
-      return false;
-    }
   }
 
   bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
@@ -636,8 +658,18 @@
     }                                                           //   (pic boot image).
     dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
 #else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
     UNUSED(oat_file_begin);
+    static_assert(!kIsTargetBuild, "host_dlopen_handles_ will leak handles");
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    if (dlopen_handle_ != nullptr) {
+      MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
+      if (!host_dlopen_handles_.insert(dlopen_handle_).second) {
+        dlclose(dlopen_handle_);
+        dlopen_handle_ = nullptr;
+        *error_msg = StringPrintf("host dlopen re-opened '%s'", elf_filename.c_str());
+        return false;
+      }
+    }
 #endif  // ART_TARGET_ANDROID
   }
   if (dlopen_handle_ == nullptr) {
@@ -657,6 +689,14 @@
   struct dl_iterate_context {
     static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
       auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      context->shared_objects_seen++;
+      if (context->shared_objects_seen < context->shared_objects_before) {
+        // We haven't been called yet for anything we haven't seen before. Just continue.
+        // Note: this is aggressively optimistic. If another thread was unloading a library,
+        //       we may miss out here. However, this does not happen often in practice.
+        return 0;
+      }
+
       // See whether this callback corresponds to the file which we have just loaded.
       bool contains_begin = false;
       for (int i = 0; i < info->dlpi_phnum; i++) {
@@ -687,11 +727,22 @@
     }
     const uint8_t* const begin_;
     std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { Begin(), &dlopen_mmaps_ };
+    const size_t shared_objects_before;
+    size_t shared_objects_seen;
+  };
+  dl_iterate_context context = { Begin(), &dlopen_mmaps_, shared_objects_before_, 0};
 
   if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
+    // Hm. Maybe our optimization went wrong. Try another time with shared_objects_before == 0
+    // before giving up. This should be unusual.
+    VLOG(oat) << "Need a second run in PreSetup, didn't find with shared_objects_before="
+              << shared_objects_before_;
+    dl_iterate_context context0 = { Begin(), &dlopen_mmaps_, 0, 0};
+    if (dl_iterate_phdr(dl_iterate_context::callback, &context0) == 0) {
+      // OK, give up and print an error.
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
+    }
   }
 #endif
 }
@@ -728,6 +779,9 @@
     return ptr;
   }
 
+  void PreLoad() OVERRIDE {
+  }
+
   bool Load(const std::string& elf_filename,
             uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
             bool writable,
@@ -897,7 +951,12 @@
   ScopedTrace trace("Open oat file " + location);
   CHECK(!filename.empty()) << location;
   CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+
+  // Check that the file even exists, fast-fail.
+  if (!OS::FileExists(filename.c_str())) {
+    *error_msg = StringPrintf("File %s does not exist.", filename.c_str());
+    return nullptr;
+  }
 
   // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
   // disabled.
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index fba10ca..218c490 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -153,7 +153,7 @@
 }
 
 OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target) {
-  bool compilation_desired = CompilerFilter::IsCompilationEnabled(target);
+  bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
 
   // See if the oat file is in good shape as is.
   bool oat_okay = OatFileCompilerFilterIsOkay(target);
@@ -220,6 +220,10 @@
   return true;
 }
 
+bool OatFileAssistant::IsUpToDate() {
+  return OatFileIsUpToDate() || OdexFileIsUpToDate();
+}
+
 OatFileAssistant::ResultOfAttemptToUpdate
 OatFileAssistant::MakeUpToDate(std::string* error_msg) {
   CompilerFilter::Filter target;
@@ -600,7 +604,7 @@
 
   CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
 
-  if (CompilerFilter::IsCompilationEnabled(current_compiler_filter)) {
+  if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
     if (!file.IsPic()) {
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f48cdf3..bb7b408 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -149,6 +149,10 @@
   // given compiler filter.
   DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter);
 
+  // Returns true if there is up-to-date code for this dex location,
+  // irrespective of the compiler filter of the up-to-date code.
+  bool IsUpToDate();
+
   // Return code used when attempting to generate updated code.
   enum ResultOfAttemptToUpdate {
     kUpdateFailed,        // We tried making the code up to date, but
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 15a1aa4..c79a9a6 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -233,7 +233,7 @@
     EXPECT_TRUE(odex_file->HasPatchInfo());
     EXPECT_EQ(filter, odex_file->GetCompilerFilter());
 
-    if (CompilerFilter::IsCompilationEnabled(filter)) {
+    if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
       const std::vector<gc::space::ImageSpace*> image_spaces =
         runtime->GetHeap()->GetBootImageSpaces();
       ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 0af6716..d7d2b5c 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -586,23 +586,25 @@
 
   const OatFile* source_oat_file = nullptr;
 
-  // Update the oat file on disk if we can, based on the --compiler-filter
-  // option derived from the current runtime options.
-  // This may fail, but that's okay. Best effort is all that matters here.
-  switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
-    case OatFileAssistant::kUpdateFailed:
-      LOG(WARNING) << error_msg;
-      break;
+  if (!oat_file_assistant.IsUpToDate()) {
+    // Update the oat file on disk if we can, based on the --compiler-filter
+    // option derived from the current runtime options.
+    // This may fail, but that's okay. Best effort is all that matters here.
+    switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
+      case OatFileAssistant::kUpdateFailed:
+        LOG(WARNING) << error_msg;
+        break;
 
-    case OatFileAssistant::kUpdateNotAttempted:
-      // Avoid spamming the logs if we decided not to attempt making the oat
-      // file up to date.
-      VLOG(oat) << error_msg;
-      break;
+      case OatFileAssistant::kUpdateNotAttempted:
+        // Avoid spamming the logs if we decided not to attempt making the oat
+        // file up to date.
+        VLOG(oat) << error_msg;
+        break;
 
-    case OatFileAssistant::kUpdateSucceeded:
-      // Nothing to do.
-      break;
+      case OatFileAssistant::kUpdateSucceeded:
+        // Nothing to do.
+        break;
+    }
   }
 
   // Get the oat file on disk.
@@ -728,28 +730,6 @@
   return dex_files;
 }
 
-bool OatFileManager::RegisterOatFileLocation(const std::string& oat_location) {
-  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
-  auto it = oat_file_count_.find(oat_location);
-  if (it != oat_file_count_.end()) {
-    ++it->second;
-    return false;
-  }
-  oat_file_count_.insert(std::pair<std::string, size_t>(oat_location, 1u));
-  return true;
-}
-
-void OatFileManager::UnRegisterOatFileLocation(const std::string& oat_location) {
-  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
-  auto it = oat_file_count_.find(oat_location);
-  if (it != oat_file_count_.end()) {
-    --it->second;
-    if (it->second == 0) {
-      oat_file_count_.erase(it);
-    }
-  }
-}
-
 void OatFileManager::DumpForSigQuit(std::ostream& os) {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index a1d1275..45ac4b7 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -64,16 +64,6 @@
   const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_base_location) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
-  // Attempt to reserve a location, returns false if it is already reserved or already in used by
-  // an oat file.
-  bool RegisterOatFileLocation(const std::string& oat_location)
-      REQUIRES(!Locks::oat_file_count_lock_);
-
-  // Unreserve oat file location, should only be used for error cases since RegisterOatFile will
-  // remove the reserved location.
-  void UnRegisterOatFileLocation(const std::string& oat_location)
-      REQUIRES(!Locks::oat_file_count_lock_);
-
   // Returns true if we have a non pic oat file.
   bool HaveNonPicOatFile() const {
     return have_non_pic_oat_file_;
@@ -132,7 +122,6 @@
       REQUIRES(Locks::oat_file_manager_lock_);
 
   std::set<std::unique_ptr<const OatFile>> oat_files_ GUARDED_BY(Locks::oat_file_manager_lock_);
-  std::unordered_map<std::string, size_t> oat_file_count_ GUARDED_BY(Locks::oat_file_count_lock_);
   bool have_non_pic_oat_file_;
 
   DISALLOW_COPY_AND_ASSIGN(OatFileManager);
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
index ca5efe5..54ec5d3 100644
--- a/runtime/openjdkjvm/OpenjdkJvm.cc
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -58,11 +58,6 @@
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 
-#ifdef ART_TARGET_ANDROID
-// This function is provided by android linker.
-extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
-#endif  // ART_TARGET_ANDROID
-
 #undef LOG_TAG
 #define LOG_TAG "artopenjdk"
 
@@ -74,11 +69,15 @@
 /* posix open() with extensions; used by e.g. ZipFile */
 JNIEXPORT jint JVM_Open(const char* fname, jint flags, jint mode) {
     /*
-     * The call is expected to handle JVM_O_DELETE, which causes the file
-     * to be removed after it is opened.  Also, some code seems to
-     * want the special return value JVM_EEXIST if the file open fails
-     * due to O_EXCL.
+     * Some code seems to want the special return value JVM_EEXIST if the
+     * file open fails due to O_EXCL.
      */
+    // Don't use JVM_O_DELETE, it's problematic with FUSE, see b/28901232.
+    if (flags & JVM_O_DELETE) {
+        LOG(FATAL) << "JVM_O_DELETE option is not supported (while opening: '"
+                   << fname << "')";
+    }
+
     int fd = TEMP_FAILURE_RETRY(open(fname, flags & ~JVM_O_DELETE, mode));
     if (fd < 0) {
         int err = errno;
@@ -89,12 +88,6 @@
         }
     }
 
-    if (flags & JVM_O_DELETE) {
-        if (unlink(fname) != 0) {
-            LOG(WARNING) << "Post-open deletion of '" << fname << "' failed: " << strerror(errno);
-        }
-    }
-
     return fd;
 }
 
@@ -324,22 +317,6 @@
   exit(status);
 }
 
-static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
-#ifdef ART_TARGET_ANDROID
-  if (javaLdLibraryPath != nullptr) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
-    if (ldLibraryPath.c_str() != nullptr) {
-      android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
-    }
-  }
-
-#else
-  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
-  UNUSED(javaLdLibraryPath, env);
-#endif
-}
-
-
 JNIEXPORT jstring JVM_NativeLoad(JNIEnv* env,
                                  jstring javaFilename,
                                  jobject javaLoader,
@@ -349,17 +326,6 @@
     return NULL;
   }
 
-  int32_t target_sdk_version = art::Runtime::Current()->GetTargetSdkVersion();
-
-  // Starting with N nativeLoad uses classloader local
-  // linker namespace instead of global LD_LIBRARY_PATH
-  // (23 is Marshmallow). This call is here to preserve
-  // backwards compatibility for the apps targeting sdk
-  // version <= 23
-  if (target_sdk_version == 0) {
-    SetLdLibraryPath(env, javaLibrarySearchPath);
-  }
-
   std::string error_msg;
   {
     art::JavaVMExt* vm = art::Runtime::Current()->GetJavaVM();
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index a3e1f00..e9dd7aa 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -50,7 +50,8 @@
       handler_method_(nullptr),
       handler_dex_pc_(0),
       clear_exception_(false),
-      handler_frame_depth_(kInvalidFrameDepth) {}
+      handler_frame_depth_(kInvalidFrameDepth),
+      full_fragment_done_(false) {}
 
 // Finds catch handler.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
@@ -290,7 +291,8 @@
         single_frame_deopt_(single_frame),
         single_frame_done_(false),
         single_frame_deopt_method_(nullptr),
-        single_frame_deopt_quick_method_header_(nullptr) {
+        single_frame_deopt_quick_method_header_(nullptr),
+        callee_method_(nullptr) {
   }
 
   ArtMethod* GetSingleFrameDeoptMethod() const {
@@ -301,23 +303,34 @@
     return single_frame_deopt_quick_method_header_;
   }
 
+  void FinishStackWalk() SHARED_REQUIRES(Locks::mutator_lock_) {
+    // This is the upcall, or the next full frame in single-frame deopt, or the
+    // code isn't deoptimizeable. We remember the frame and last pc so that we
+    // may long jump to them.
+    exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
+    exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+    exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
+    if (!stacked_shadow_frame_pushed_) {
+      // In case there is no deoptimized shadow frame for this upcall, we still
+      // need to push a nullptr to the stack since there is always a matching pop after
+      // the long jump.
+      GetThread()->PushStackedShadowFrame(nullptr,
+                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
+      stacked_shadow_frame_pushed_ = true;
+    }
+    if (GetMethod() == nullptr) {
+      exception_handler_->SetFullFragmentDone(true);
+    } else {
+      CHECK(callee_method_ != nullptr) << art::PrettyMethod(GetMethod(), false);
+      exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(callee_method_));
+    }
+  }
+
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
     if (method == nullptr || single_frame_done_) {
-      // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
-      // and last pc so that we may long jump to them.
-      exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
-      exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
-      exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
-      if (!stacked_shadow_frame_pushed_) {
-        // In case there is no deoptimized shadow frame for this upcall, we still
-        // need to push a nullptr to the stack since there is always a matching pop after
-        // the long jump.
-        GetThread()->PushStackedShadowFrame(nullptr,
-                                            StackedShadowFrameType::kDeoptimizationShadowFrame);
-        stacked_shadow_frame_pushed_ = true;
-      }
+      FinishStackWalk();
       return false;  // End stack walk.
     } else if (method->IsRuntimeMethod()) {
       // Ignore callee save method.
@@ -328,7 +341,14 @@
       // the native method.
       // The top method is a runtime method, the native method comes next.
       CHECK_EQ(GetFrameDepth(), 1U);
+      callee_method_ = method;
       return true;
+    } else if (!single_frame_deopt_ &&
+               !Runtime::Current()->IsDeoptimizeable(GetCurrentQuickFramePc())) {
+      // We hit some code that's not deoptimizeable. However, Single-frame deoptimization triggered
+      // from compiled code is always allowed since HDeoptimize always saves the full environment.
+      FinishStackWalk();
+      return false;  // End stack walk.
     } else {
       // Check if a shadow frame already exists for debugger's set-local-value purpose.
       const size_t frame_id = GetFrameId();
@@ -356,20 +376,17 @@
         // right before interpreter::EnterInterpreterFromDeoptimize().
         stacked_shadow_frame_pushed_ = true;
         GetThread()->PushStackedShadowFrame(
-            new_frame,
-            single_frame_deopt_
-                ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
-                : StackedShadowFrameType::kDeoptimizationShadowFrame);
+            new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame);
       }
       prev_shadow_frame_ = new_frame;
 
       if (single_frame_deopt_ && !IsInInlinedFrame()) {
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
-        exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
         single_frame_deopt_method_ = method;
         single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
+      callee_method_ = method;
       return true;
     }
   }
@@ -478,10 +495,30 @@
   bool single_frame_done_;
   ArtMethod* single_frame_deopt_method_;
   const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
+  ArtMethod* callee_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
 
+void QuickExceptionHandler::PrepareForLongJumpToInvokeStubOrInterpreterBridge() {
+  if (full_fragment_done_) {
+    // Restore deoptimization exception. When returning from the invoke stub,
+    // ArtMethod::Invoke() will see the special exception to know deoptimization
+    // is needed.
+    self_->SetException(Thread::GetDeoptimizationException());
+  } else {
+    // PC needs to be of the quick-to-interpreter bridge.
+    int32_t offset;
+    #ifdef __LP64__
+        offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
+    #else
+        offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
+    #endif
+    handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+        reinterpret_cast<uint8_t*>(self_) + offset);
+  }
+}
+
 void QuickExceptionHandler::DeoptimizeStack() {
   DCHECK(is_deoptimization_);
   if (kDebugExceptionDelivery) {
@@ -490,9 +527,7 @@
 
   DeoptimizeStackVisitor visitor(self_, context_, this, false);
   visitor.WalkStack(true);
-
-  // Restore deoptimization exception
-  self_->SetException(Thread::GetDeoptimizationException());
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
 void QuickExceptionHandler::DeoptimizeSingleFrame() {
@@ -518,20 +553,21 @@
         deopt_method, GetQuickToInterpreterBridge());
   }
 
-  // PC needs to be of the quick-to-interpreter bridge.
-  int32_t offset;
-  #ifdef __LP64__
-      offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
-  #else
-      offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
-  #endif
-  handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(self_) + offset);
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
-void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
-  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+void QuickExceptionHandler::DeoptimizePartialFragmentFixup(uintptr_t return_pc) {
+  // At this point, the instrumentation stack has been updated. We need to install
+  // the real return pc on stack, in case instrumentation stub is stored there,
+  // so that the interpreter bridge code can return to the right place.
+  if (return_pc != 0) {
+    uintptr_t* pc_addr = reinterpret_cast<uintptr_t*>(handler_quick_frame_);
+    CHECK(pc_addr != nullptr);
+    pc_addr--;
+    *reinterpret_cast<uintptr_t*>(pc_addr) = return_pc;
+  }
 
+  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
   if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
     // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
     // change how longjump works.
@@ -581,7 +617,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstrumentationStackVisitor);
 };
 
-void QuickExceptionHandler::UpdateInstrumentationStack() {
+uintptr_t QuickExceptionHandler::UpdateInstrumentationStack() {
+  uintptr_t return_pc = 0;
   if (method_tracing_active_) {
     InstrumentationStackVisitor visitor(self_, handler_frame_depth_);
     visitor.WalkStack(true);
@@ -589,9 +626,10 @@
     size_t instrumentation_frames_to_pop = visitor.GetInstrumentationFramesToPop();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     for (size_t i = 0; i < instrumentation_frames_to_pop; ++i) {
-      instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
+      return_pc = instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
     }
   }
+  return return_pc;
 }
 
 void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index eedf83f..74b7d0d 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -46,15 +46,29 @@
   // Find the catch handler for the given exception.
   void FindCatch(mirror::Throwable* exception) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
-  // shadow frame that will be executed with the interpreter.
+  // Deoptimize the stack to the upcall/some code that's not deoptimizeable. For
+  // every compiled frame, we create a "copy" shadow frame that will be executed
+  // with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Deoptimize a single frame. It's directly triggered from compiled code. It
+  // has the following properties:
+  // - It deoptimizes a single frame, which can include multiple inlined frames.
+  // - It doesn't have return result or pending exception at the deoptimization point.
+  // - It always deoptimizes, even if IsDeoptimizeable() returns false for the
+  //   code, since HDeoptimize always saves the full environment. So it overrides
+  //   the result of IsDeoptimizeable().
+  // - It can be either full-fragment, or partial-fragment deoptimization, depending
+  //   on whether that single frame covers full or partial fragment.
   void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
-  void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void DeoptimizePartialFragmentFixup(uintptr_t return_pc)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update the instrumentation stack by removing all methods that will be unwound
   // by the exception being thrown.
-  void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Return the return pc of the last frame that's unwound.
+  uintptr_t UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Set up environment before delivering an exception to optimized code.
   void SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor)
@@ -103,8 +117,16 @@
     handler_frame_depth_ = frame_depth;
   }
 
+  bool IsFullFragmentDone() const {
+    return full_fragment_done_;
+  }
+
+  void SetFullFragmentDone(bool full_fragment_done) {
+    full_fragment_done_ = full_fragment_done;
+  }
+
   // Walk the stack frames of the given thread, printing out non-runtime methods with their types
-  // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+  // of frames. Helps to verify that partial-fragment deopt really works as expected.
   static void DumpFramesWithType(Thread* self, bool details = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -131,6 +153,13 @@
   bool clear_exception_;
   // Frame depth of the catch handler or the upcall.
   size_t handler_frame_depth_;
+  // Does the handler successfully walk the full fragment (not stopped
+  // by some code that's not deoptimizeable)? Even single-frame deoptimization
+  // can set this to true if the fragment contains only one quick frame.
+  bool full_fragment_done_;
+
+  void PrepareForLongJumpToInvokeStubOrInterpreterBridge()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(QuickExceptionHandler);
 };
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 63976d0..caf5545 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1972,6 +1972,11 @@
   return verify_ == verifier::VerifyMode::kSoftFail;
 }
 
+bool Runtime::IsDeoptimizeable(uintptr_t code) const
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return !heap_->IsInBootImageOatFile(reinterpret_cast<void *>(code));
+}
+
 LinearAlloc* Runtime::CreateLinearAlloc() {
   // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
   // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 1394462..b7f377d 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -648,6 +648,10 @@
     return zygote_no_threads_;
   }
 
+  // Returns if the code can be deoptimized. Code may be compiled with some
+  // optimization that makes it impossible to deoptimize.
+  bool IsDeoptimizeable(uintptr_t code) const SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   static void InitPlatformSignalHandlers();
 
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 4610f6f..ab69d4f 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -73,7 +73,7 @@
     using Key = RuntimeArgumentMapKey<TValue>;
 
     // List of key declarations, shorthand for 'static const Key<T> Name'
-#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> Name;
+#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> (Name);
 #include "runtime_options.def"
   };
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 4248944..f1f4a12 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "nth_caller_visitor.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "quick_exception_handler.h"
@@ -84,6 +85,8 @@
 
 namespace art {
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self);
+
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
@@ -270,7 +273,6 @@
   StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
   if (must_be_present) {
     DCHECK(record != nullptr);
-    DCHECK_EQ(record->GetType(), type);
   } else {
     if (record == nullptr || record->GetType() != type) {
       return nullptr;
@@ -2411,8 +2413,8 @@
 template<size_t ptr_size>
 void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
 #define DO_THREAD_OFFSET(x, y) \
-    if (offset == x.Uint32Value()) { \
-      os << y; \
+    if (offset == (x).Uint32Value()) { \
+      os << (y); \
       return; \
     }
   DO_THREAD_OFFSET(ThreadFlagsOffset<ptr_size>(), "state_and_flags")
@@ -2583,38 +2585,42 @@
   // Get exception from thread.
   mirror::Throwable* exception = GetException();
   CHECK(exception != nullptr);
-  bool is_deoptimization = (exception == GetDeoptimizationException());
-  if (!is_deoptimization) {
-    // This is a real exception: let the instrumentation know about it.
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-    if (instrumentation->HasExceptionCaughtListeners() &&
-        IsExceptionThrownByCurrentMethod(exception)) {
-      // Instrumentation may cause GC so keep the exception object safe.
-      StackHandleScope<1> hs(this);
-      HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
-      instrumentation->ExceptionCaughtEvent(this, exception);
-    }
-    // Does instrumentation need to deoptimize the stack?
-    // Note: we do this *after* reporting the exception to instrumentation in case it
-    // now requires deoptimization. It may happen if a debugger is attached and requests
-    // new events (single-step, breakpoint, ...) when the exception is reported.
-    is_deoptimization = Dbg::IsForcedInterpreterNeededForException(this);
-    if (is_deoptimization) {
+  if (exception == GetDeoptimizationException()) {
+    artDeoptimize(this);
+    UNREACHABLE();
+  }
+
+  // This is a real exception: let the instrumentation know about it.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->HasExceptionCaughtListeners() &&
+      IsExceptionThrownByCurrentMethod(exception)) {
+    // Instrumentation may cause GC so keep the exception object safe.
+    StackHandleScope<1> hs(this);
+    HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
+    instrumentation->ExceptionCaughtEvent(this, exception);
+  }
+  // Does instrumentation need to deoptimize the stack?
+  // Note: we do this *after* reporting the exception to instrumentation in case it
+  // now requires deoptimization. It may happen if a debugger is attached and requests
+  // new events (single-step, breakpoint, ...) when the exception is reported.
+  if (Dbg::IsForcedInterpreterNeededForException(this)) {
+    NthCallerVisitor visitor(this, 0, false);
+    visitor.WalkStack();
+    if (Runtime::Current()->IsDeoptimizeable(visitor.caller_pc)) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
       PushDeoptimizationContext(
           JValue(), /*is_reference */ false, /* from_code */ false, exception);
+      artDeoptimize(this);
+      UNREACHABLE();
     }
   }
+
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
   ClearException();
-  QuickExceptionHandler exception_handler(this, is_deoptimization);
-  if (is_deoptimization) {
-    exception_handler.DeoptimizeStack();
-  } else {
-    exception_handler.FindCatch(exception);
-  }
+  QuickExceptionHandler exception_handler(this, false);
+  exception_handler.FindCatch(exception);
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
 }
@@ -3024,7 +3030,6 @@
   mirror::Throwable* pending_exception = nullptr;
   bool from_code = false;
   PopDeoptimizationContext(result, &pending_exception, &from_code);
-  CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization";
   SetTopOfStack(nullptr);
   SetTopOfShadowStack(shadow_frame);
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 582a0cd..3c367ee 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -110,7 +110,6 @@
 enum class StackedShadowFrameType {
   kShadowFrameUnderConstruction,
   kDeoptimizationShadowFrame,
-  kSingleFrameDeoptimizationShadowFrame
 };
 
 // This should match RosAlloc::kNumThreadLocalSizeBrackets.
@@ -1119,7 +1118,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Avoid use, callers should use SetState. Used only by SignalCatcher::HandleSigQuit, ~Thread and
-  // Dbg::Disconnected.
+  // Dbg::ManageDeoptimization.
   ThreadState SetStateUnsafe(ThreadState new_state) {
     ThreadState old_state = GetState();
     if (old_state == kRunnable && new_state != kRunnable) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2b96328..b2be770 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -4652,7 +4652,7 @@
       if (field->IsFinal() && field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
         Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << PrettyField(field)
                                         << " from other class " << GetDeclaringClass();
-        return;
+        // Keep hunting for possible hard fails.
       }
     }
 
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 9aaed9d..bf561e9 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -809,6 +809,7 @@
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
     Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(Math.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -832,7 +833,16 @@
     Assert.assertEquals(Math.round(-3.0f), -3);
     // 0.4999999701976776123046875
     Assert.assertEquals(Math.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(Math.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(Math.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(Math.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(Math.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(Math.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(Math.round(-8388608.0f), -8388608);  // -2^23
     Assert.assertEquals(Math.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(Math.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(Math.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(Math.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
@@ -1144,6 +1154,7 @@
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
     Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
     Assert.assertEquals(StrictMath.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(StrictMath.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -1167,7 +1178,16 @@
     Assert.assertEquals(StrictMath.round(-3.0f), -3);
     // 0.4999999701976776123046875
     Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(StrictMath.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(StrictMath.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(StrictMath.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(StrictMath.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(StrictMath.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(StrictMath.round(-8388608.0f), -8388608);  // -2^23
     Assert.assertEquals(StrictMath.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(StrictMath.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(StrictMath.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(StrictMath.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 0dab400..c6a2e9a 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -55,7 +55,7 @@
 
     const OatFile* oat_file = oat_dex_file->GetOatFile();
     return !oat_file->IsPic()
-        && CompilerFilter::IsCompilationEnabled(oat_file->GetCompilerFilter());
+        && CompilerFilter::IsBytecodeCompilationEnabled(oat_file->GetCompilerFilter());
   }
 };
 
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 15a9504..24ed2fe 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -107,8 +107,28 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeVirtual {{.*\.equals.*}}
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
   /// CHECK-NOT:      test
+
+  /// CHECK-START-X86_64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      test
+
+  /// CHECK-START-ARM: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  // CompareAndBranchIfZero() may emit either CBZ or CMP+BEQ.
+  /// CHECK-NOT:      cbz
+  /// CHECK-NOT:      cmp {{r\d+}}, #0
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      cbz
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{w.*,}} {{w.*}}
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
     return "foo".equals(obj);
@@ -116,12 +136,53 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeVirtual
-  /// CHECK:      test
-  /// CHECK:      jz/eq
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
   // Check that we don't try to compare the classes.
-  /// CHECK-NOT:  mov
-  /// CHECK:      cmp
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is very brittle as it depends on the order we emit instructions.
+  /// CHECK-START-X86_64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          {{cbz|cmp}}
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          cbz
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently does not explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
   public static boolean stringArgumentIsString() {
     return "foo".equals(myString);
   }
diff --git a/test/597-deopt-new-string/src/Main.java b/test/597-deopt-new-string/src/Main.java
index 1224e40..e78f0d3 100644
--- a/test/597-deopt-new-string/src/Main.java
+++ b/test/597-deopt-new-string/src/Main.java
@@ -48,7 +48,12 @@
             throw new Error();
         }
         char[] arr = {'a', 'b', 'c'};
-        return new String(arr, 0, arr.length);
+        String str = new String(arr, 0, arr.length);
+        if (!str.equals("abc")) {
+            System.out.println("Failure 1! " + str);
+            System.exit(0);
+        }
+        return str;
     }
 
     public void run() {
@@ -68,7 +73,11 @@
         } else {
             // This thread keeps doing new String() from a char array.
             while (!done) {
-                $noinline$run0();
+                String str = $noinline$run0();
+                if (!str.equals("abc")) {
+                    System.out.println("Failure 2! " + str);
+                    System.exit(0);
+                }
             }
         }
     }
diff --git a/test/600-verifier-fails/expected.txt b/test/600-verifier-fails/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/600-verifier-fails/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/600-verifier-fails/info.txt b/test/600-verifier-fails/info.txt
new file mode 100644
index 0000000..478dd9b
--- /dev/null
+++ b/test/600-verifier-fails/info.txt
@@ -0,0 +1,4 @@
+The situation in this test was discovered by running dexfuzz on
+another fuzzingly random generated Java test. The soft verification
+fail (on the final field modification) should not hide the hard
+verification fail (on the type mismatch) to avoid a crash later on.
diff --git a/test/600-verifier-fails/smali/sput.smali b/test/600-verifier-fails/smali/sput.smali
new file mode 100644
index 0000000..87f3799
--- /dev/null
+++ b/test/600-verifier-fails/smali/sput.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LA;
+.super Ljava/lang/Object;
+
+.method public foo(I)V
+.registers 2
+    sput v1, LMain;->staticField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/src/Main.java b/test/600-verifier-fails/src/Main.java
new file mode 100644
index 0000000..ba4cc31
--- /dev/null
+++ b/test/600-verifier-fails/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static final String staticField = null;
+
+  public static void main(String[] args) throws Exception {
+    try {
+      Class<?> a = Class.forName("A");
+    } catch (java.lang.VerifyError e) {
+      System.out.println("passed");
+    }
+  }
+}
diff --git a/test/601-method-access/expected.txt b/test/601-method-access/expected.txt
new file mode 100644
index 0000000..90fbab8
--- /dev/null
+++ b/test/601-method-access/expected.txt
@@ -0,0 +1 @@
+Got expected failure
diff --git a/test/601-method-access/info.txt b/test/601-method-access/info.txt
new file mode 100644
index 0000000..e38a336
--- /dev/null
+++ b/test/601-method-access/info.txt
@@ -0,0 +1 @@
+Regression test for method access checks.
diff --git a/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali b/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali
new file mode 100644
index 0000000..7a896a2
--- /dev/null
+++ b/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSubClassUsingInaccessibleMethod;
+
+.super Lother/PublicClass;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Lother/PublicClass;-><init>()V
+    return-void
+.end method
+
+# Regression test for compiler DCHECK() failure (bogus check) when referencing
+# a package-private method from an indirectly inherited package-private class,
+# using this very class as the declaring class in the MethodId, bug: 28771056.
+.method public test()I
+    .registers 2
+    invoke-virtual {p0}, LSubClassUsingInaccessibleMethod;->otherProtectedClassPackageIntInstanceMethod()I
+    move-result v0
+    return v0
+.end method
diff --git a/test/601-method-access/src/Main.java b/test/601-method-access/src/Main.java
new file mode 100644
index 0000000..838080a
--- /dev/null
+++ b/test/601-method-access/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+
+/*
+ * Test method access through reflection.
+ */
+public class Main {
+  public static void main(String[] args) {
+    try {
+      Class c = Class.forName("SubClassUsingInaccessibleMethod");
+      Object o = c.newInstance();
+      c.getMethod("test").invoke(o, null);
+    } catch (InvocationTargetException ite) {
+      if (ite.getCause() instanceof IllegalAccessError) {
+        System.out.println("Got expected failure");
+      } else {
+        System.out.println("Got unexpected failure " + ite.getCause());
+      }
+    } catch (Exception e) {
+      System.out.println("Got unexpected failure " + e);
+    }
+  }
+}
diff --git a/test/601-method-access/src/other/ProtectedClass.java b/test/601-method-access/src/other/ProtectedClass.java
new file mode 100644
index 0000000..9426884
--- /dev/null
+++ b/test/601-method-access/src/other/ProtectedClass.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+// Class that cannot be accessed outside of this package.
+class ProtectedClass {
+ /* package */ int otherProtectedClassPackageIntInstanceMethod() {
+   return 28;
+ }
+}
diff --git a/test/601-method-access/src/other/PublicClass.java b/test/601-method-access/src/other/PublicClass.java
new file mode 100644
index 0000000..d9f7961
--- /dev/null
+++ b/test/601-method-access/src/other/PublicClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+// Class that makes the ProtectedClass sub-classable by classes outside of package other.
+public class PublicClass extends ProtectedClass {
+}
diff --git a/test/602-deoptimizeable/expected.txt b/test/602-deoptimizeable/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/602-deoptimizeable/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/602-deoptimizeable/info.txt b/test/602-deoptimizeable/info.txt
new file mode 100644
index 0000000..d0952f9
--- /dev/null
+++ b/test/602-deoptimizeable/info.txt
@@ -0,0 +1 @@
+Test various cases for full/partial-fragment deoptimization.
diff --git a/test/602-deoptimizeable/src/Main.java b/test/602-deoptimizeable/src/Main.java
new file mode 100644
index 0000000..743a579
--- /dev/null
+++ b/test/602-deoptimizeable/src/Main.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+
+class DummyObject {
+    public static boolean sHashCodeInvoked = false;
+    private int i;
+
+    public DummyObject(int i) {
+        this.i = i;
+    }
+
+    public boolean equals(Object obj) {
+        return (obj instanceof DummyObject) && (i == ((DummyObject)obj).i);
+    }
+
+    public int hashCode() {
+        sHashCodeInvoked = true;
+        Main.assertIsManaged();
+        Main.deoptimizeAll();
+        Main.assertIsInterpreted();
+        Main.assertCallerIsManaged();  // Caller is from framework code HashMap.
+        return i % 64;
+    }
+}
+
+public class Main {
+    static boolean sFlag = false;
+
+    public static native void deoptimizeAll();
+    public static native void undeoptimizeAll();
+    public static native void assertIsInterpreted();
+    public static native void assertIsManaged();
+    public static native void assertCallerIsInterpreted();
+    public static native void assertCallerIsManaged();
+    public static native void disableStackFrameAsserts();
+    public static native boolean hasOatFile();
+    public static native boolean isInterpreted();
+
+    public static void execute(Runnable runnable) throws Exception {
+      Thread t = new Thread(runnable);
+      t.start();
+      t.join();
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        // Only test stack frames in compiled mode.
+        if (!hasOatFile() || isInterpreted()) {
+          disableStackFrameAsserts();
+        }
+        final HashMap<DummyObject, Long> map = new HashMap<DummyObject, Long>();
+
+        // Single-frame deoptimization that covers partial fragment.
+        execute(new Runnable() {
+            public void run() {
+                int[] arr = new int[3];
+                assertIsManaged();
+                int res = $noinline$run1(arr);
+                assertIsManaged();  // Only single frame is deoptimized.
+                if (res != 79) {
+                    System.out.println("Failure 1!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        // Single-frame deoptimization that covers a full fragment.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    int[] arr = new int[3];
+                    assertIsManaged();
+                    // Use reflection to call $noinline$run2 so that it does
+                    // full-fragment deoptimization since that is an upcall.
+                    Class<?> cls = Class.forName("Main");
+                    Method method = cls.getDeclaredMethod("$noinline$run2", int[].class);
+                    double res = (double)method.invoke(Main.class, arr);
+                    assertIsManaged();  // Only single frame is deoptimized.
+                    if (res != 79.3d) {
+                        System.out.println("Failure 2!");
+                        System.exit(0);
+                    }
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        // Full-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                assertIsManaged();
+                float res = $noinline$run3B();
+                assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                if (res != 0.034f) {
+                    System.out.println("Failure 3!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        // Partial-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    assertIsManaged();
+                    map.put(new DummyObject(10), Long.valueOf(100));
+                    assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        if (!DummyObject.sHashCodeInvoked) {
+            System.out.println("hashCode() method not invoked!");
+        }
+        if (map.get(new DummyObject(10)) != 100) {
+            System.out.println("Wrong hashmap value!");
+        }
+        System.out.println("Finishing");
+    }
+
+    public static int $noinline$run1(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted(); // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79;
+    }
+
+    public static double $noinline$run2(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted();  // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79.3d;
+    }
+
+    public static float $noinline$run3A() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        // Deoptimize callers.
+        deoptimizeAll();
+        assertIsInterpreted();
+        assertCallerIsInterpreted();  // $noinline$run3B is deoptimizeable.
+        return 0.034f;
+    }
+
+    public static float $noinline$run3B() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        float res = $noinline$run3A();
+        assertIsInterpreted();
+        return res;
+    }
+}
diff --git a/test/804-class-extends-itself/expected.txt b/test/804-class-extends-itself/expected.txt
new file mode 100644
index 0000000..b98f963
--- /dev/null
+++ b/test/804-class-extends-itself/expected.txt
@@ -0,0 +1,2 @@
+Caught ClassCircularityError
+Done!
diff --git a/test/804-class-extends-itself/info.txt b/test/804-class-extends-itself/info.txt
new file mode 100644
index 0000000..c48934c
--- /dev/null
+++ b/test/804-class-extends-itself/info.txt
@@ -0,0 +1 @@
+Exercise class linker check for classes extending themselves (b/28685551).
diff --git a/test/804-class-extends-itself/smali/Main.smali b/test/804-class-extends-itself/smali/Main.smali
new file mode 100644
index 0000000..5c349ed
--- /dev/null
+++ b/test/804-class-extends-itself/smali/Main.smali
@@ -0,0 +1,57 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We cannot implement Main in Java, as this would require to run
+# dexmerger (to merge the Dex file produced from Smali code and the
+# Dex file produced from Java code), which loops indefinitely when
+# processing class B28685551, as this class inherits from itself.  As
+# a workaround, implement Main using Smali (we could also have used
+# multidex, but this requires a custom build script).
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .registers 3
+    .param p0, "args"
+
+    invoke-static {}, LMain;->test()V
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Done!"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+.end method
+
+.method static test()V
+    .registers 4
+
+    :try_start
+    const-string v2, "B28685551"
+    invoke-static {v2}, Ljava/lang/Class;->forName(Ljava/lang/String;)Ljava/lang/Class;
+    :try_end
+    .catch Ljava/lang/ClassCircularityError; {:try_start .. :try_end} :catch
+
+    move-result-object v0
+
+    :goto_7
+    return-void
+
+    :catch
+    move-exception v1
+    .local v1, "e":Ljava/lang/ClassCircularityError;
+    sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v3, "Caught ClassCircularityError"
+    invoke-virtual {v2, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    goto :goto_7
+.end method
diff --git a/test/804-class-extends-itself/smali/b_28685551.smali b/test/804-class-extends-itself/smali/b_28685551.smali
new file mode 100644
index 0000000..d98c6e3
--- /dev/null
+++ b/test/804-class-extends-itself/smali/b_28685551.smali
@@ -0,0 +1,18 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Regression test for a class inheriting from itself.
+
+.class public LB28685551;
+.super LB28685551;
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index 922eae6..85ea1c8 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -37,17 +37,20 @@
   asserts_enabled = false;
 }
 
-
-// public static native boolean isInterpreted();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass) {
+static jboolean IsInterpreted(JNIEnv* env, jclass, size_t level) {
   ScopedObjectAccess soa(env);
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 
+// public static native boolean isInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 1);
+}
+
 // public static native void assertIsInterpreted();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsInterpreted(JNIEnv* env, jclass klass) {
@@ -56,10 +59,7 @@
   }
 }
 
-
-// public static native boolean isManaged();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+static jboolean IsManaged(JNIEnv* env, jclass cls, size_t level) {
   ScopedObjectAccess soa(env);
 
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
@@ -71,13 +71,19 @@
     return JNI_FALSE;
   }
 
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
 
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
 }
 
+// public static native boolean isManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 1);
+}
+
 // public static native void assertIsManaged();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsManaged(JNIEnv* env, jclass cls) {
@@ -86,4 +92,32 @@
   }
 }
 
+// public static native boolean isCallerInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 2);
+}
+
+// public static native void assertCallerIsInterpreted();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass klass) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerInterpreted(env, klass));
+  }
+}
+
+// public static native boolean isCallerManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 2);
+}
+
+// public static native void assertCallerIsManaged();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerManaged(env, cls));
+  }
+}
+
 }  // namespace art
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index aa45d40..4e99702 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -42,9 +42,9 @@
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
 if [ "$ART_USE_READ_BARRIER" = "true" ]; then
-  TIME_OUT_VALUE=900  # 15 minutes.
+  TIME_OUT_VALUE=1800  # 30 minutes.
 else
-  TIME_OUT_VALUE=600  # 10 minutes.
+  TIME_OUT_VALUE=1200  # 20 minutes.
 fi
 USE_GDB="n"
 USE_JVM="n"
@@ -199,6 +199,10 @@
         shift
         INSTRUCTION_SET_FEATURES="$1"
         shift
+    elif [ "x$1" = "x--timeout" ]; then
+        shift
+        TIME_OUT_VALUE="$1"
+        shift
     elif [ "x$1" = "x--" ]; then
         shift
         break
diff --git a/test/run-test b/test/run-test
index 2710ea3..d4a02f3 100755
--- a/test/run-test
+++ b/test/run-test
@@ -241,7 +241,7 @@
         shift
     elif [ "x$1" = "x--strace" ]; then
         strace="yes"
-        run_args="${run_args} --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
+        run_args="${run_args} --timeout 1800 --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
         shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Device.java b/tools/dexfuzz/src/dexfuzz/executors/Device.java
index 4a53957..45538fe 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Device.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Device.java
@@ -68,7 +68,13 @@
     return envVars.get(key);
   }
 
-  private String getHostCoreImagePath() {
+  private String getHostCoreImagePathWithArch() {
+    // TODO: Using host currently implies x86 (see Options.java), change this when generalized.
+    assert(Options.useArchX86);
+    return androidHostOut + "/framework/x86/core.art";
+  }
+
+  private String getHostCoreImagePathNoArch() {
     return androidHostOut + "/framework/core.art";
   }
 
@@ -80,7 +86,7 @@
     androidHostOut = checkForEnvVar(envVars, "ANDROID_HOST_OUT");
 
     if (Options.executeOnHost) {
-      File coreImage = new File(getHostCoreImagePath());
+      File coreImage = new File(getHostCoreImagePathWithArch());
       if (!coreImage.exists()) {
         Log.errorAndQuit("Host core image not found at " + coreImage.getPath()
             + ". Did you forget to build it?");
@@ -156,7 +162,7 @@
    * Get any extra flags required to execute ART on the host.
    */
   public String getHostExecutionFlags() {
-    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePath());
+    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePathNoArch());
   }
 
   public String getAndroidHostOut() {
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index b6a19b7..976e1d8 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -24,7 +24,7 @@
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
-       "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
+       "make apache-harmony-jdwp-tests-hostdex vogar"
   exit 1
 fi
 
@@ -44,6 +44,8 @@
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
 host="no"
+# Use JIT compiling by default.
+use_jit=true
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
@@ -62,6 +64,11 @@
   elif [[ $1 == -Ximage:* ]]; then
     image="$1"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    use_jit=false
+    # Remove the --no-jit from the arguments.
+    args=${args/$1}
+    shift
   elif [[ $1 == "--debug" ]]; then
     debug="yes"
     # Remove the --debug from the arguments.
@@ -90,8 +97,12 @@
 if [[ "$image" != "" ]]; then
   vm_args="--vm-arg $image"
 fi
-vm_args="$vm_args --vm-arg -Xusejit:true"
-debuggee_args="$debuggee_args -Xusejit:true"
+if $use_jit; then
+  vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+  debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=interpret-only"
+fi
+vm_args="$vm_args --vm-arg -Xusejit:$use_jit"
+debuggee_args="$debuggee_args -Xusejit:$use_jit"
 if [[ $debug == "yes" ]]; then
   art="$art -d"
   art_debugee="$art_debugee -d"
@@ -111,7 +122,6 @@
       $image_compiler_option \
       --timeout 800 \
       --vm-arg -Djpda.settings.verbose=true \
-      --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
       --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jack \
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 00bb3c5..3e2a512 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -28,7 +28,7 @@
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build core-tests, jsr166-tests and vogar: \
-        make core-tests jsr166-tests vogar vogar.jar"
+        make core-tests jsr166-tests vogar"
   exit 1
 fi
 
@@ -43,6 +43,9 @@
   emulator="yes"
 fi
 
+# Use JIT compiling by default.
+use_jit=true
+
 # Packages that currently work correctly with the expectation files.
 working_packages=("dalvik.system"
                   "libcore.icu"
@@ -91,6 +94,11 @@
     # classpath/resources differences when compiling the boot image.
     vogar_args="$vogar_args --vm-arg -Ximage:/non/existent/vogar.art"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    # Remove the --no-jit from the arguments.
+    vogar_args=${vogar_args/$1}
+    use_jit=false
+    shift
   elif [[ "$1" == "--debug" ]]; then
     # Remove the --debug from the arguments.
     vogar_args=${vogar_args/$1}
@@ -111,7 +119,13 @@
 # Use Jack with "1.8" configuration.
 vogar_args="$vogar_args --toolchain jack --language JN"
 
+# JIT settings.
+if $use_jit; then
+  vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+fi
+vogar_args="$vogar_args --vm-arg -Xusejit:$use_jit"
+
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}
+vogar $vogar_args $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}