Merge "RFC: Generate select instruction for conditional returns."
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ba4581c..11808c1 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,6 +22,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include "android-base/strings.h"
+
 #include "arch/instruction_set.h"
 #include "base/array_ref.h"
 #include "base/bit_utils.h"
@@ -379,6 +381,14 @@
 
   bool CanAssumeVerified(ClassReference ref) const;
 
+  // Is `boot_image_filename` the name of a core image (small boot
+  // image used for ART testing only)?
+  static bool IsCoreImageFilename(const std::string& boot_image_filename) {
+    // TODO: This is under-approximating...
+    return android::base::EndsWith(boot_image_filename, "core.art")
+        || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
+  }
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 3cacc2c..538845d 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -18,6 +18,8 @@
 
 #include <fstream>
 
+#include "runtime.h"
+
 namespace art {
 
 CompilerOptions::CompilerOptions()
@@ -30,6 +32,7 @@
       inline_max_code_units_(kUnsetInlineMaxCodeUnits),
       no_inline_from_(nullptr),
       boot_image_(false),
+      core_image_(false),
       app_image_(false),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
@@ -55,6 +58,19 @@
   // because we don't want to include the PassManagerOptions definition from the header file.
 }
 
+bool CompilerOptions::EmitRunTimeChecksInDebugMode() const {
+  // Run-time checks (e.g. Marking Register checks) are only emitted
+  // in debug mode, and
+  // - when running on device; or
+  // - when running on host, but only
+  //   - when compiling the core image (which is used only for testing); or
+  //   - when JIT compiling (only relevant for non-native methods).
+  // This is to prevent these checks from being emitted into pre-opted
+  // boot image or apps, as these are compiled with dex2oatd.
+  return kIsDebugBuild &&
+      (kIsTargetBuild || IsCoreImage() || Runtime::Current()->UseJitCompilation());
+}
+
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
   ParseUintOption(option, "--huge-method-max", &huge_method_threshold_, Usage);
 }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index b99263d..1e05c4e 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -161,6 +161,9 @@
     return generate_mini_debug_info_;
   }
 
+  // Should run-time checks be emitted in debug mode?
+  bool EmitRunTimeChecksInDebugMode() const;
+
   bool GetGenerateBuildId() const {
     return generate_build_id_;
   }
@@ -177,10 +180,19 @@
     return implicit_suspend_checks_;
   }
 
+  // Are we compiling a boot image?
   bool IsBootImage() const {
     return boot_image_;
   }
 
+  // Are we compiling a core image (small boot image only used for ART testing)?
+  bool IsCoreImage() const {
+    // Ensure that `core_image_` => `boot_image_`.
+    DCHECK(!core_image_ || boot_image_);
+    return core_image_;
+  }
+
+  // Are we compiling an app image?
   bool IsAppImage() const {
     return app_image_;
   }
@@ -266,6 +278,7 @@
   const std::vector<const DexFile*>* no_inline_from_;
 
   bool boot_image_;
+  bool core_image_;
   bool app_image_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index b65b93f..e7e4647 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -219,7 +219,9 @@
   // Assembler that holds generated instructions
   std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
       GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
-  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
+  const CompilerOptions& compiler_options = driver->GetCompilerOptions();
+  jni_asm->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo());
+  jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode());
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2927e1f..0d9d3d4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -31,12 +31,6 @@
 
 namespace art {
 
-void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
-  if (compilation_stats_ != nullptr) {
-    compilation_stats_->RecordStat(compilation_stat);
-  }
-}
-
 bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
   if (compiler_driver_ == nullptr) {
     // Note that the compiler driver is null when unit testing.
@@ -53,7 +47,8 @@
     VLOG(compiler) << "Skip compilation of huge method "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
@@ -63,7 +58,8 @@
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 43429cf..2c9a9ef 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -109,7 +109,6 @@
   static constexpr const char* kBuilderPassName = "builder";
 
  private:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
   bool SkipCompilation(size_t number_of_branches);
 
   HGraph* const graph_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d7d0fff..1e5f1ec 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -610,12 +610,6 @@
   }
 }
 
-void CodeGenerator::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
-  if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat, count);
-  }
-}
-
 std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
                                                      InstructionSet instruction_set,
                                                      const InstructionSetFeatures& isa_features,
@@ -1212,10 +1206,10 @@
 
 void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
   if (compiler_options_.GetImplicitNullChecks()) {
-    MaybeRecordStat(kImplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, kImplicitNullCheckGenerated);
     GenerateImplicitNullCheck(instruction);
   } else {
-    MaybeRecordStat(kExplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, kExplicitNullCheckGenerated);
     GenerateExplicitNullCheck(instruction);
   }
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 51a0bae..30c2b52 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -254,8 +254,6 @@
 
   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
-
   // Saves the register in the stack. Returns the size taken on stack.
   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
   // Restores the register from the stack. Returns the size taken on stack.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4999950..3be774a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1595,6 +1595,8 @@
       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
     }
   }
+
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
@@ -3587,6 +3589,7 @@
   }
   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
@@ -4391,6 +4394,7 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
@@ -4459,6 +4463,8 @@
     DCHECK(!codegen_->IsLeafMethod());
     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4626,6 +4632,7 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   codegen_->GenerateInvokePolymorphicCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
@@ -4801,27 +4808,37 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
     return;
   }
 
-  // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
-  // are no pools emitted.
-  EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
-  LocationSummary* locations = invoke->GetLocations();
-  codegen_->GenerateStaticOrDirectCall(
-      invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+  {
+    // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
+    // are no pools emitted.
+    EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+    LocationSummary* locations = invoke->GetLocations();
+    codegen_->GenerateStaticOrDirectCall(
+        invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+  }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
     return;
   }
 
-  // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
-  // are no pools emitted.
-  EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
-  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-  DCHECK(!codegen_->IsLeafMethod());
+  {
+    // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
+    // are no pools emitted.
+    EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+    codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+    DCHECK(!codegen_->IsLeafMethod());
+  }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
@@ -4895,6 +4912,7 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
@@ -4995,6 +5013,7 @@
     } else {
       __ Bind(slow_path->GetExitLabel());
     }
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
   }
 }
 
@@ -5113,6 +5132,7 @@
       codegen_->AddSlowPath(slow_path);
       __ Cbz(out.X(), slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
+      codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
       return;
     }
     case HLoadString::LoadKind::kJitTableAddress: {
@@ -5137,6 +5157,7 @@
   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -5164,6 +5185,7 @@
   } else {
     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -5260,6 +5282,7 @@
       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -5296,6 +5319,7 @@
     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -5644,6 +5668,7 @@
     return;
   }
   GenerateSuspendCheck(instruction, nullptr);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
@@ -6021,6 +6046,7 @@
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6074,22 +6100,25 @@
         obj.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
-    EmissionCheckScope guard(GetVIXLAssembler(),
-                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
-    vixl::aarch64::Label return_address;
-    __ adr(lr, &return_address);
-    __ Bind(cbnz_label);
-    __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
-    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
-                  "Field LDR must be 1 instruction (4B) before the return address label; "
-                  " 2 instructions (8B) for heap poisoning.");
-    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
-    __ ldr(ref_reg, MemOperand(base.X(), offset));
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
+    {
+      EmissionCheckScope guard(GetVIXLAssembler(),
+                               (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+      vixl::aarch64::Label return_address;
+      __ adr(lr, &return_address);
+      __ Bind(cbnz_label);
+      __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+      static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                    "Field LDR must be 1 instruction (4B) before the return address label; "
+                    " 2 instructions (8B) for heap poisoning.");
+      Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+      __ ldr(ref_reg, MemOperand(base.X(), offset));
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
+      GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+      __ Bind(&return_address);
     }
-    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-    __ Bind(&return_address);
+    MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
     return;
   }
 
@@ -6158,19 +6187,22 @@
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(temp.X(), obj.X(), Operand(data_offset));
-    EmissionCheckScope guard(GetVIXLAssembler(),
-                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
-    vixl::aarch64::Label return_address;
-    __ adr(lr, &return_address);
-    __ Bind(cbnz_label);
-    __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
-    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
-                  "Array LDR must be 1 instruction (4B) before the return address label; "
-                  " 2 instructions (8B) for heap poisoning.");
-    __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
-    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
-    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-    __ Bind(&return_address);
+    {
+      EmissionCheckScope guard(GetVIXLAssembler(),
+                               (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+      vixl::aarch64::Label return_address;
+      __ adr(lr, &return_address);
+      __ Bind(cbnz_label);
+      __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+      static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                    "Array LDR must be 1 instruction (4B) before the return address label; "
+                    " 2 instructions (8B) for heap poisoning.");
+      __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+      DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+      GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+      __ Bind(&return_address);
+    }
+    MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
     return;
   }
 
@@ -6247,6 +6279,7 @@
   GenerateRawReferenceLoad(
       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -6303,6 +6336,7 @@
   // Fast path: the GC is not marking: nothing to do (the field is
   // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -6381,6 +6415,19 @@
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 }
 
+void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+  // The following condition is a compile-time one, so it does not have a run-time cost.
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+    // The following condition is a run-time one; it is executed after the
+    // previous compile-time test, to avoid penalizing non-debug builds.
+    if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
+      GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
+    }
+  }
+}
+
 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
                                                  Location out,
                                                  Location ref,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 584eead..c339209 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -687,6 +687,22 @@
                                 bool needs_null_check,
                                 bool use_load_acquire);
 
+  // Emit code checking the status of the Marking Register, and
+  // aborting the program if MR does not match the value stored in the
+  // art::Thread object. Code is only emitted in debug mode and if
+  // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+  //
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+  // passed to the BRK instruction.
+  //
+  // If `temp_loc` is a valid location, it is expected to be a
+  // register and will be used as a temporary to generate code;
+  // otherwise, a temporary will be fetched from the core register
+  // scratch pool.
+  virtual void MaybeGenerateMarkingRegisterCheck(int code,
+                                                 Location temp_loc = Location::NoLocation());
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 430cdde..3d45dd3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -94,6 +94,9 @@
 // The reserved entrypoint register for link-time generated thunks.
 const vixl32::Register kBakerCcEntrypointRegister = r4;
 
+// Using a base helps identify when we hit Marking Register check breakpoints.
+constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
+
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
 #endif
@@ -2690,6 +2693,8 @@
     __ Mov(temp, 0);
     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
   }
+
+  MaybeGenerateMarkingRegisterCheck(/* code */ 1);
 }
 
 void CodeGeneratorARMVIXL::GenerateFrameExit() {
@@ -2938,6 +2943,7 @@
   }
   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
@@ -3655,6 +3661,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -3685,12 +3692,15 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4);
     return;
   }
 
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5);
 }
 
 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
@@ -3709,11 +3719,14 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6);
     return;
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -3790,6 +3803,8 @@
     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
     DCHECK(!codegen_->IsLeafMethod());
   }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
@@ -3798,6 +3813,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   codegen_->GenerateInvokePolymorphicCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9);
 }
 
 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
@@ -5329,6 +5345,7 @@
     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10);
 }
 
 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
@@ -5348,6 +5365,7 @@
   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   DCHECK(!codegen_->IsLeafMethod());
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11);
 }
 
 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
@@ -6965,6 +6983,7 @@
     return;
   }
   GenerateSuspendCheck(instruction, nullptr);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12);
 }
 
 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
@@ -7326,6 +7345,7 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
@@ -7405,6 +7425,7 @@
     } else {
       __ Bind(slow_path->GetExitLabel());
     }
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14);
   }
 }
 
@@ -7528,6 +7549,7 @@
       codegen_->AddSlowPath(slow_path);
       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
+      codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15);
       return;
     }
     case HLoadString::LoadKind::kJitTableAddress: {
@@ -7548,6 +7570,7 @@
   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16);
 }
 
 static int32_t GetExceptionTlsOffset() {
@@ -8146,6 +8169,7 @@
   } else {
     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17);
 }
 
 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
@@ -8647,6 +8671,7 @@
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
 }
 
 void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8711,31 +8736,34 @@
         base.GetCode(), obj.GetCode(), narrow);
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
-    vixl32::Label return_address;
-    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
-    __ cmp(mr, Operand(0));
-    EmitPlaceholderBne(this, bne_label);
-    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
-    __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
-    }
-    // Note: We need a specific width for the unpoisoning NEG.
-    if (kPoisonHeapReferences) {
-      if (narrow) {
-        // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
-        __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
-      } else {
-        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    {
+      vixl::EmissionCheckScope guard(
+          GetVIXLAssembler(),
+          (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+      vixl32::Label return_address;
+      EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+      __ cmp(mr, Operand(0));
+      EmitPlaceholderBne(this, bne_label);
+      ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+      __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
       }
+      // Note: We need a specific width for the unpoisoning NEG.
+      if (kPoisonHeapReferences) {
+        if (narrow) {
+          // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+          __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+        } else {
+          __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+        }
+      }
+      __ Bind(&return_address);
+      DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+                       : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
     }
-    __ Bind(&return_address);
-    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
-              narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
-                     : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+    MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip));
     return;
   }
 
@@ -8796,23 +8824,26 @@
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(data_reg, obj, Operand(data_offset));
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
-    vixl32::Label return_address;
-    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
-    __ cmp(mr, Operand(0));
-    EmitPlaceholderBne(this, bne_label);
-    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
-    __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
-    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
-    // Note: We need a Wide NEG for the unpoisoning.
-    if (kPoisonHeapReferences) {
-      __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    {
+      vixl::EmissionCheckScope guard(
+          GetVIXLAssembler(),
+          (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+      vixl32::Label return_address;
+      EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+      __ cmp(mr, Operand(0));
+      EmitPlaceholderBne(this, bne_label);
+      ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+      __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+      DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+      // Note: We need a Wide NEG for the unpoisoning.
+      if (kPoisonHeapReferences) {
+        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+      }
+      __ Bind(&return_address);
+      DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
     }
-    __ Bind(&return_address);
-    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
-              BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+    MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
     return;
   }
 
@@ -8866,6 +8897,7 @@
   // Fast path: the GC is not marking: just load the reference.
   GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ 21);
 }
 
 void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -8920,6 +8952,7 @@
   // Fast path: the GC is not marking: nothing to do (the field is
   // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ 22);
 }
 
 void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -8981,6 +9014,20 @@
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 }
 
+void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+  // The following condition is a compile-time one, so it does not have a run-time cost.
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+    // The following condition is a run-time one; it is executed after the
+    // previous compile-time test, to avoid penalizing non-debug builds.
+    if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
+      GetAssembler()->GenerateMarkingRegisterCheck(temp,
+                                                   kMarkingRegisterCheckBreakCodeBaseCode + code);
+    }
+  }
+}
+
 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
                                                    Location out,
                                                    Location ref,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 7ab2993..5feb33b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -661,6 +661,28 @@
                                 ScaleFactor scale_factor,
                                 bool needs_null_check);
 
+  // Emit code checking the status of the Marking Register, and
+  // aborting the program if MR does not match the value stored in the
+  // art::Thread object. Code is only emitted in debug mode and if
+  // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+  //
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+  // used together with kMarkingRegisterCheckBreakCodeBaseCode to
+  // create the value passed to the BKPT instruction. Note that unlike
+  // in the ARM64 code generator, where `__LINE__` is passed as `code`
+  // argument to
+  // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck, we cannot
+  // realistically do that here, as Encoding T1 for the BKPT
+  // instruction only accepts 8-bit immediate values.
+  //
+  // If `temp_loc` is a valid location, it is expected to be a
+  // register and will be used as a temporary to generate code;
+  // otherwise, a temporary will be fetched from the core register
+  // scratch pool.
+  virtual void MaybeGenerateMarkingRegisterCheck(int code,
+                                                 Location temp_loc = Location::NoLocation());
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index e598e19..6c3a9fd 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -414,7 +414,7 @@
     if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
       continue;
     }
-    MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk);
     instruction->MoveBefore(position, /* ensure_safety */ false);
   }
 }
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cada2e6..aa4f5da 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -79,6 +79,21 @@
 };
 
 #ifdef ART_ENABLE_CODEGEN_arm
+// Special ARM code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (R9) and possibly the Marking Register
+//   (R8) before entering the generated function (both registers are
+//   callee-save in AAPCS);
+// - set these registers to meaningful values before or upon entering
+//   the generated function (so that generated code using them is
+//   correct);
+// - restore their original values before leaving the generated
+//   function.
+
 // Provide our own codegen, that ensures the C calling conventions
 // are preserved. Currently, ART and C do not match as R4 is caller-save
 // in ART, and callee-save in C. Alternatively, we could use or write
@@ -100,6 +115,50 @@
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
   }
+
+  void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
+                                         Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+    // When turned on, the marking register checks in
+    // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
+    // Thread Register and the Marking Register to be set to
+    // meaningful values. This is not the case in codegen testing, so
+    // just disable them entirely here (by doing nothing in this
+    // method).
+  }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+// Special ARM64 code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (X19) and possibly the Marking Register
+//   (X20) before entering the generated function (both registers are
+//   callee-save in AAPCS64);
+// - set these registers to meaningful values before or upon entering
+//   the generated function (so that generated code using them is
+//   correct);
+// - restore their original values before leaving the generated
+//   function.
+class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
+ public:
+  TestCodeGeneratorARM64(HGraph* graph,
+                         const Arm64InstructionSetFeatures& isa_features,
+                         const CompilerOptions& compiler_options)
+      : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {}
+
+  void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
+                                         Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+    // When turned on, the marking register checks in
+    // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
+    // Thread Register and the Marking Register to be set to
+    // meaningful values. This is not the case in codegen testing, so
+    // just disable them entirely here (by doing nothing in this
+    // method).
+  }
 };
 #endif
 
@@ -263,7 +322,8 @@
                     bool has_result,
                     Expected expected) {
   CompilerOptions compiler_options;
-  std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
+  std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph,
+                                                                           compiler_options));
   RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
 }
 
@@ -280,9 +340,8 @@
 CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
   std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
       Arm64InstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
-                                                           *features_arm64.get(),
-                                                           compiler_options);
+  return new (graph->GetArena())
+      TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options);
 }
 #endif
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index c31c66a..787296d 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -359,7 +359,7 @@
       DCHECK(!inst->IsControlFlow());
       if (inst->IsDeadAndRemovable()) {
         block->RemoveInstruction(inst);
-        MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
+        MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction);
       }
     }
   }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0141c26..6567a3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -75,7 +75,7 @@
 #define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
 #define LOG_NOTE() LOG_INTERNAL("Note: ")
 #define LOG_SUCCESS() LOG_INTERNAL("Success: ")
-#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL(stats_ptr, stat) MaybeRecordStat(stats_ptr, stat); LOG_INTERNAL("Fail: ")
 #define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
 
 std::string HInliner::DepthString(int line) const {
@@ -440,9 +440,9 @@
         // Add dependency due to devirtulization. We've assumed resolved_method
         // has single implementation.
         outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
-        MaybeRecordStat(kCHAInline);
+        MaybeRecordStat(stats_, kCHAInline);
       } else {
-        MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+        MaybeRecordStat(stats_, kInlinedInvokeVirtualOrInterface);
       }
     }
     return result;
@@ -532,7 +532,7 @@
     }
 
     case kInlineCacheMonomorphic: {
-      MaybeRecordStat(kMonomorphicCall);
+      MaybeRecordStat(stats_, kMonomorphicCall);
       if (UseOnlyPolymorphicInliningWithNoDeopt()) {
         return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
@@ -541,7 +541,7 @@
     }
 
     case kInlineCachePolymorphic: {
-      MaybeRecordStat(kPolymorphicCall);
+      MaybeRecordStat(stats_, kPolymorphicCall);
       return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
     }
 
@@ -550,7 +550,7 @@
           << "Interface or virtual call to "
           << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
           << " is megamorphic and not inlined";
-      MaybeRecordStat(kMegamorphicCall);
+      MaybeRecordStat(stats_, kMegamorphicCall);
       return false;
     }
 
@@ -754,7 +754,7 @@
   dex::TypeIndex class_index = FindClassIndexIn(
       GetMonomorphicType(classes), caller_compilation_unit_);
   if (!class_index.IsValid()) {
-    LOG_FAIL(kNotInlinedDexCache)
+    LOG_FAIL(stats_, kNotInlinedDexCache)
         << "Call to " << ArtMethod::PrettyMethod(resolved_method)
         << " from inline cache is not inlined because its class is not"
         << " accessible to the caller";
@@ -803,7 +803,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(kInlinedMonomorphicCall);
+  MaybeRecordStat(stats_, kInlinedMonomorphicCall);
   return true;
 }
 
@@ -993,7 +993,7 @@
     return false;
   }
 
-  MaybeRecordStat(kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
 
   // Run type propagation to get the guards typed.
   ReferenceTypePropagation rtp_fixup(graph_,
@@ -1199,7 +1199,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
 
   LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
   return true;
@@ -1300,14 +1300,14 @@
                                  ReferenceTypeInfo receiver_type,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    LOG_FAIL(kNotInlinedProxy)
+    LOG_FAIL(stats_, kNotInlinedProxy)
         << "Method " << method->PrettyMethod()
         << " is not inlined because of unimplemented inline support for proxy methods.";
     return false;
   }
 
   if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
-    LOG_FAIL(kNotInlinedRecursiveBudget)
+    LOG_FAIL(stats_, kNotInlinedRecursiveBudget)
         << "Method "
         << method->PrettyMethod()
         << " is not inlined because it has reached its recursive call budget.";
@@ -1321,10 +1321,10 @@
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
       LOG_SUCCESS() << "Successfully replaced pattern of invoke "
                     << method->PrettyMethod();
-      MaybeRecordStat(kReplacedInvokeWithSimplePattern);
+      MaybeRecordStat(stats_, kReplacedInvokeWithSimplePattern);
       return true;
     }
-    LOG_FAIL(kNotInlinedWont)
+    LOG_FAIL(stats_, kNotInlinedWont)
         << "Won't inline " << method->PrettyMethod() << " in "
         << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
         << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
@@ -1344,7 +1344,7 @@
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    LOG_FAIL(kNotInlinedCodeItem)
+    LOG_FAIL(stats_, kNotInlinedCodeItem)
         << "Method " << method->PrettyMethod()
         << " is not inlined because its code item is too big: "
         << code_item->insns_size_in_code_units_
@@ -1354,13 +1354,13 @@
   }
 
   if (code_item->tries_size_ != 0) {
-    LOG_FAIL(kNotInlinedTryCatch)
+    LOG_FAIL(stats_, kNotInlinedTryCatch)
         << "Method " << method->PrettyMethod() << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    LOG_FAIL(kNotInlinedNotVerified)
+    LOG_FAIL(stats_, kNotInlinedNotVerified)
         << "Method " << method->PrettyMethod()
         << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
@@ -1370,7 +1370,7 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      LOG_FAIL(kNotInlinedNotVerified)
+      LOG_FAIL(stats_, kNotInlinedNotVerified)
           << "Method " << method->PrettyMethod()
           << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -1381,7 +1381,7 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+    LOG_FAIL(stats_, kNotInlinedDexCache) << "Method " << method->PrettyMethod()
              << " is not inlined because it is static and requires a clinit"
              << " check that cannot be emitted due to Dex cache limitations";
     return false;
@@ -1393,7 +1393,7 @@
   }
 
   LOG_SUCCESS() << method->PrettyMethod();
-  MaybeRecordStat(kInlinedInvoke);
+  MaybeRecordStat(stats_, kInlinedInvoke);
   return true;
 }
 
@@ -1677,7 +1677,7 @@
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    LOG_FAIL(kNotInlinedCannotBuild)
+    LOG_FAIL(stats_, kNotInlinedCannotBuild)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be built, so cannot be inlined";
     return false;
@@ -1685,7 +1685,7 @@
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    LOG_FAIL(kNotInlinedRegisterAllocator)
+    LOG_FAIL(stats_, kNotInlinedRegisterAllocator)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " cannot be inlined because of the register allocator";
     return false;
@@ -1738,7 +1738,7 @@
 
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    LOG_FAIL(kNotInlinedInfiniteLoop)
+    LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it has an infinite loop";
     return false;
@@ -1749,14 +1749,14 @@
     if (predecessor->GetLastInstruction()->IsThrow()) {
       if (invoke_instruction->GetBlock()->IsTryBlock()) {
         // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
-        LOG_FAIL(kNotInlinedTryCatch)
+        LOG_FAIL(stats_, kNotInlinedTryCatch)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller is in a try/catch block";
         return false;
       } else if (graph_->GetExitBlock() == nullptr) {
         // TODO(ngeoffray): Support adding HExit in the caller graph.
-        LOG_FAIL(kNotInlinedInfiniteLoop)
+        LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller does not have an exit block";
@@ -1775,7 +1775,7 @@
   }
 
   if (!has_one_return) {
-    LOG_FAIL(kNotInlinedAlwaysThrows)
+    LOG_FAIL(stats_, kNotInlinedAlwaysThrows)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it always throws";
     return false;
@@ -1788,7 +1788,7 @@
       if (block->GetLoopInformation()->IsIrreducible()) {
         // Don't inline methods with irreducible loops, they could prevent some
         // optimizations to run.
-        LOG_FAIL(kNotInlinedIrreducibleLoop)
+        LOG_FAIL(stats_, kNotInlinedIrreducibleLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains an irreducible loop";
         return false;
@@ -1797,7 +1797,7 @@
         // Don't inline methods with loops without exit, since they cause the
         // loop information to be computed incorrectly when updating after
         // inlining.
-        LOG_FAIL(kNotInlinedLoopWithoutExit)
+        LOG_FAIL(stats_, kNotInlinedLoopWithoutExit)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains a loop with no exit";
         return false;
@@ -1808,7 +1808,7 @@
          !instr_it.Done();
          instr_it.Advance()) {
       if (++number_of_instructions >= inlining_budget_) {
-        LOG_FAIL(kNotInlinedInstructionBudget)
+        LOG_FAIL(stats_, kNotInlinedInstructionBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because the outer method has reached"
             << " its instruction budget limit.";
@@ -1817,7 +1817,7 @@
       HInstruction* current = instr_it.Current();
       if (current->NeedsEnvironment() &&
           (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
-        LOG_FAIL(kNotInlinedEnvironmentBudget)
+        LOG_FAIL(stats_, kNotInlinedEnvironmentBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because its caller has reached"
             << " its environment budget limit.";
@@ -1827,7 +1827,7 @@
       if (current->NeedsEnvironment() &&
           !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
                                             resolved_method)) {
-        LOG_FAIL(kNotInlinedStackMaps)
+        LOG_FAIL(stats_, kNotInlinedStackMaps)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " needs an environment, is in a different dex file"
@@ -1836,7 +1836,7 @@
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        LOG_FAIL(kNotInlinedDexCache)
+        LOG_FAIL(stats_, kNotInlinedDexCache)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " it is in a different dex file and requires access to the dex cache";
@@ -1848,7 +1848,7 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+        LOG_FAIL(stats_, kNotInlinedUnresolvedEntrypoint)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it is using an unresolved"
             << " entrypoint";
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 143c77f..ca3b191 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -28,12 +28,6 @@
 
 namespace art {
 
-void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
-  if (compilation_stats_ != nullptr) {
-    compilation_stats_->RecordStat(compilation_stat);
-  }
-}
-
 HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
   return block_builder_->GetBlockAt(dex_pc);
 }
@@ -670,6 +664,9 @@
       DCHECK(fence_target != nullptr);
 
       AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
+      MaybeRecordStat(
+          compilation_stats_,
+          MethodCompilationStat::kConstructorFenceGeneratedFinal);
     }
     AppendInstruction(new (arena_) HReturnVoid(dex_pc));
   } else {
@@ -816,7 +813,8 @@
   ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
 
   if (UNLIKELY(resolved_method == nullptr)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
@@ -1039,6 +1037,9 @@
   HConstructorFence* ctor_fence =
       new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_);
   AppendInstruction(ctor_fence);
+  MaybeRecordStat(
+      compilation_stats_,
+      MethodCompilationStat::kConstructorFenceGeneratedNew);
 }
 
 static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
@@ -1122,7 +1123,8 @@
       VLOG(compiler) << "Did not compile "
                      << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kNotCompiledMalformedOpcode);
       return false;
     }
     HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
@@ -1136,7 +1138,8 @@
     VLOG(compiler) << "Did not compile "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledMalformedOpcode);
     return false;
   }
 
@@ -1286,7 +1289,8 @@
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
     HInstruction* field_set = nullptr;
     if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kUnresolvedField);
       field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
                                                            value,
                                                            field_type,
@@ -1309,7 +1313,8 @@
   } else {
     HInstruction* field_get = nullptr;
     if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kUnresolvedField);
       field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
                                                            field_type,
                                                            field_index,
@@ -1444,7 +1449,8 @@
   ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put);
 
   if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedField);
     Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
     return true;
@@ -1462,7 +1468,8 @@
   if (constant == nullptr) {
     // The class cannot be referenced from this compiled code. Generate
     // an unresolved access.
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
     return true;
   }
@@ -2823,7 +2830,8 @@
                      << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of unhandled instruction "
                      << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kNotCompiledUnhandledInstruction);
       return false;
   }
   return true;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 2a9b9f5..b7fa394 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -78,8 +78,6 @@
   bool Build();
 
  private:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
-
   void InitializeBlockLocals();
   void PropagateLocalsToCatchBlocks();
   void SetLoopHeaderPhiInputs();
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 02cfbbc..5c79511 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -43,13 +43,7 @@
   void RecordSimplification() {
     simplification_occurred_ = true;
     simplifications_at_current_position_++;
-    MaybeRecordStat(kInstructionSimplifications);
-  }
-
-  void MaybeRecordStat(MethodCompilationStat stat) {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(stat);
-    }
+    MaybeRecordStat(stats_, kInstructionSimplifications);
   }
 
   bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
@@ -517,7 +511,7 @@
 
   if (object->IsNullConstant()) {
     check_cast->GetBlock()->RemoveInstruction(check_cast);
-    MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
+    MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
     return;
   }
 
@@ -527,7 +521,7 @@
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
-      MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
+      MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
       if (!load_class->HasUses()) {
         // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
         // However, here we know that it cannot because the checkcast was successfull, hence
@@ -557,7 +551,7 @@
 
   HGraph* graph = GetGraph();
   if (object->IsNullConstant()) {
-    MaybeRecordStat(kRemovedInstanceOf);
+    MaybeRecordStat(stats_, kRemovedInstanceOf);
     instruction->ReplaceWith(graph->GetIntConstant(0));
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
@@ -568,7 +562,7 @@
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
-    MaybeRecordStat(kRemovedInstanceOf);
+    MaybeRecordStat(stats_, kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
       HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object);
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 7bdeef5..11725f4 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -154,7 +154,8 @@
                                  NeedsEnvironmentOrCache(intrinsic),
                                  GetSideEffects(intrinsic),
                                  GetExceptions(intrinsic));
-            MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
+            MaybeRecordStat(stats_,
+                            MethodCompilationStat::kIntrinsicRecognized);
           }
         }
       }
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index f0086fb..10524b0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -141,7 +141,7 @@
             DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
-          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
+          MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) {
           // If `instruction` can do something visible (throw or write),
           // we cannot move further instructions that can throw.
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index fddda3d..98b8592 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -40,8 +40,9 @@
  public:
   LSEVisitor(HGraph* graph,
              const HeapLocationCollector& heap_locations_collector,
-             const SideEffectsAnalysis& side_effects)
-      : HGraphVisitor(graph),
+             const SideEffectsAnalysis& side_effects,
+             OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph, stats),
         heap_location_collector_(heap_locations_collector),
         side_effects_(side_effects),
         heap_values_for_(graph->GetBlocks().size(),
@@ -100,7 +101,10 @@
     //   * - Constructor fences (they never escape this thread).
     //   * - Allocations (if they are unused).
     for (HInstruction* new_instance : singleton_new_instances_) {
-      HConstructorFence::RemoveConstructorFences(new_instance);
+      size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedLSE,
+                      removed);
 
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
@@ -108,7 +112,10 @@
       }
     }
     for (HInstruction* new_array : singleton_new_arrays_) {
-      HConstructorFence::RemoveConstructorFences(new_array);
+      size_t removed = HConstructorFence::RemoveConstructorFences(new_array);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedLSE,
+                      removed);
 
       if (!new_array->HasNonEnvironmentUses()) {
         new_array->RemoveEnvironmentUsers();
@@ -663,7 +670,7 @@
     return;
   }
 
-  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
+  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_);
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     lse_visitor.VisitBasicBlock(block);
   }
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index efe71c7..20a8a76 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -28,8 +28,9 @@
  public:
   LoadStoreElimination(HGraph* graph,
                        const SideEffectsAnalysis& side_effects,
-                       const LoadStoreAnalysis& lsa)
-      : HOptimization(graph, kLoadStoreEliminationPassName),
+                       const LoadStoreAnalysis& lsa,
+                       OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoadStoreEliminationPassName, stats),
         side_effects_(side_effects),
         lsa_(lsa) {}
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4143462..027ba77 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -490,13 +490,18 @@
   for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
     if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
-        CanRemoveCycle() &&
         TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
-      simplified_ = true;
-      for (HInstruction* i : *iset_) {
-        RemoveFromCycle(i);
+      // Note that it's ok to have replaced uses after the loop with the last value, without
+      // being able to remove the cycle. Environment uses (which are the reason we may not be
+      // able to remove the cycle) within the loop will still hold the right value. We must
+      // have tried first, however, to replace outside uses.
+      if (CanRemoveCycle()) {
+        simplified_ = true;
+        for (HInstruction* i : *iset_) {
+          RemoveFromCycle(i);
+        }
+        DCHECK(CheckInductionSetFullyRemoved(iset_));
       }
-      DCHECK(CheckInductionSetFullyRemoved(iset_));
     }
   }
 }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index de4dc06..1510eaf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1216,11 +1216,14 @@
   DCHECK_EQ(0u, InputCount());
 }
 
-void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
   DCHECK(instruction->GetBlock() != nullptr);
   // Removing constructor fences only makes sense for instructions with an object return type.
   DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
 
+  // Return how many instructions were removed for statistic purposes.
+  size_t remove_count = 0;
+
   // Efficient implementation that simultaneously (in one pass):
   // * Scans the uses list for all constructor fences.
   // * Deletes that constructor fence from the uses list of `instruction`.
@@ -1268,6 +1271,7 @@
       // is removed.
       if (ctor_fence->InputCount() == 0u) {
         ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+        ++remove_count;
       }
     }
   }
@@ -1281,6 +1285,8 @@
     }
     CHECK(instruction->GetBlock() != nullptr);
   }
+
+  return remove_count;
 }
 
 HInstruction* HConstructorFence::GetAssociatedAllocation() {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0b47574..f60d532 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -6632,7 +6632,9 @@
   // This must *not* be called during/after prepare_for_register_allocation,
   // because that removes all the inputs to the fences but the fence is actually
   // still considered live.
-  static void RemoveConstructorFences(HInstruction* instruction);
+  //
+  // Returns how many HConstructorFence instructions were removed from graph.
+  static size_t RemoveConstructorFences(HInstruction* instruction);
 
   // Check if this constructor fence is protecting
   // an HNewInstance or HNewArray that is also the immediate
@@ -6880,9 +6882,13 @@
 
 namespace art {
 
+class OptimizingCompilerStats;
+
 class HGraphVisitor : public ValueObject {
  public:
-  explicit HGraphVisitor(HGraph* graph) : graph_(graph) {}
+  explicit HGraphVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr)
+      : stats_(stats),
+        graph_(graph) {}
   virtual ~HGraphVisitor() {}
 
   virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
@@ -6904,6 +6910,9 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+ protected:
+  OptimizingCompilerStats* stats_;
+
  private:
   HGraph* const graph_;
 
@@ -6912,7 +6921,8 @@
 
 class HGraphDelegateVisitor : public HGraphVisitor {
  public:
-  explicit HGraphDelegateVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+  explicit HGraphDelegateVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr)
+      : HGraphVisitor(graph, stats) {}
   virtual ~HGraphDelegateVisitor() {}
 
   // Visit functions that delegate to to super class.
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 3d76949..1e68ca2 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -17,11 +17,4 @@
 #include "optimization.h"
 
 namespace art {
-
-void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
-  if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat, count);
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 0819fb0..ce41a2e 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -47,8 +47,6 @@
   virtual void Run() = 0;
 
  protected:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
-
   HGraph* const graph_;
   // Used to record stats about the optimization.
   OptimizingCompilerStats* const stats_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index af9ab67..e98c97c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,8 +22,6 @@
 
 #include <stdint.h>
 
-#include "android-base/strings.h"
-
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
@@ -329,12 +327,6 @@
 
   void UnInit() const OVERRIDE;
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat) const {
-    if (compilation_stats_.get() != nullptr) {
-      compilation_stats_->RecordStat(compilation_stat);
-    }
-  }
-
   bool JitCompile(Thread* self,
                   jit::JitCodeCache* code_cache,
                   ArtMethod* method,
@@ -512,7 +504,8 @@
   } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr);
     CHECK(most_recent_lsa != nullptr);
-    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa);
+    return
+        new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa, stats);
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
@@ -716,11 +709,12 @@
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
                               PassObserver* pass_observer,
-                              RegisterAllocator::Strategy strategy) {
+                              RegisterAllocator::Strategy strategy,
+                              OptimizingCompilerStats* stats) {
   {
     PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
                     pass_observer);
-    PrepareForRegisterAllocation(graph).Run();
+    PrepareForRegisterAllocation(graph, stats).Run();
   }
   SsaLivenessAnalysis liveness(graph, codegen);
   {
@@ -778,7 +772,7 @@
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
   LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
@@ -894,7 +888,8 @@
                                               ArtMethod* method,
                                               bool osr,
                                               VariableSizedHandleScope* handles) const {
-  MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
+  MaybeRecordStat(compilation_stats_.get(),
+                  MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
@@ -904,12 +899,14 @@
 
   // Do not attempt to compile on architectures we do not support.
   if (!IsInstructionSetSupported(instruction_set)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
     return nullptr;
   }
 
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
   }
 
@@ -919,7 +916,8 @@
   const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
   if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace)
       && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledSpaceFilter);
     return nullptr;
   }
 
@@ -966,7 +964,8 @@
                             compiler_driver->GetCompilerOptions(),
                             compilation_stats_.get()));
   if (codegen.get() == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
@@ -995,17 +994,25 @@
     GraphAnalysisResult result = builder.BuildGraph();
     if (result != kAnalysisSuccess) {
       switch (result) {
-        case kAnalysisSkipped:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+        case kAnalysisSkipped: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledSkipped);
+        }
           break;
-        case kAnalysisInvalidBytecode:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
+        case kAnalysisInvalidBytecode: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledInvalidBytecode);
+        }
           break;
-        case kAnalysisFailThrowCatchLoop:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+        case kAnalysisFailThrowCatchLoop: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledThrowCatchLoop);
+        }
           break;
-        case kAnalysisFailAmbiguousArrayOp:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+        case kAnalysisFailAmbiguousArrayOp: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+        }
           break;
         case kAnalysisSuccess:
           UNREACHABLE();
@@ -1024,7 +1031,11 @@
 
   RegisterAllocator::Strategy regalloc_strategy =
     compiler_options.GetRegisterAllocationStrategy();
-  AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+  AllocateRegisters(graph,
+                    codegen.get(),
+                    &pass_observer,
+                    regalloc_strategy,
+                    compilation_stats_.get());
 
   codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
@@ -1072,7 +1083,8 @@
                      &handles));
     }
     if (codegen.get() != nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kCompiled);
+      MaybeRecordStat(compilation_stats_.get(),
+                      MethodCompilationStat::kCompiled);
       method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
 
       if (kArenaAllocatorCountAllocations) {
@@ -1083,11 +1095,13 @@
       }
     }
   } else {
+    MethodCompilationStat method_stat;
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+      method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime;
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
+      method_stat = MethodCompilationStat::kNotCompiledVerificationError;
     }
+    MaybeRecordStat(compilation_stats_.get(), method_stat);
   }
 
   if (kIsDebugBuild &&
@@ -1111,12 +1125,7 @@
 
 bool IsCompilingWithCoreImage() {
   const std::string& image = Runtime::Current()->GetImageLocation();
-  // TODO: This is under-approximating...
-  if (android::base::EndsWith(image, "core.art") ||
-      android::base::EndsWith(image, "core-optimizing.art")) {
-    return true;
-  }
-  return false;
+  return CompilerDriver::IsCoreImageFilename(image);
 }
 
 bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
@@ -1210,18 +1219,18 @@
   uint8_t* stack_map_data = nullptr;
   uint8_t* method_info_data = nullptr;
   uint8_t* roots_data = nullptr;
-  code_cache->ReserveData(self,
-                          stack_map_size,
-                          method_info_size,
-                          number_of_roots,
-                          method,
-                          &stack_map_data,
-                          &method_info_data,
-                          &roots_data);
+  uint32_t data_size = code_cache->ReserveData(self,
+                                               stack_map_size,
+                                               method_info_size,
+                                               number_of_roots,
+                                               method,
+                                               &stack_map_data,
+                                               &method_info_data,
+                                               &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
-  MaybeRecordStat(MethodCompilationStat::kCompiled);
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
                           MemoryRegion(method_info_data, method_info_size),
                           *code_item);
@@ -1238,6 +1247,7 @@
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
+      data_size,
       osr,
       roots,
       codegen->GetGraph()->HasShouldDeoptimizeFlag(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index a211c54..d6da73c 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -23,6 +23,7 @@
 #include <type_traits>
 
 #include "atomic.h"
+#include "globals.h"
 
 namespace art {
 
@@ -86,6 +87,10 @@
   kNotInlinedWont,
   kNotInlinedRecursiveBudget,
   kNotInlinedProxy,
+  kConstructorFenceGeneratedNew,
+  kConstructorFenceGeneratedFinal,
+  kConstructorFenceRemovedLSE,
+  kConstructorFenceRemovedPFRA,
   kLastStat
 };
 
@@ -202,6 +207,10 @@
       case kNotInlinedWont: name = "NotInlinedWont"; break;
       case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
       case kNotInlinedProxy: name = "NotInlinedProxy"; break;
+      case kConstructorFenceGeneratedNew: name = "ConstructorFenceGeneratedNew"; break;
+      case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break;
+      case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break;
+      case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
@@ -216,6 +225,14 @@
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
 };
 
+inline void MaybeRecordStat(OptimizingCompilerStats* compiler_stats,
+                            MethodCompilationStat stat,
+                            uint32_t count = 1) {
+  if (compiler_stats != nullptr) {
+    compiler_stats->RecordStat(stat, count);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 7c6b69f..5de707a 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -17,6 +17,7 @@
 #include "prepare_for_register_allocation.h"
 
 #include "jni_internal.h"
+#include "optimizing_compiler_stats.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -190,8 +191,9 @@
       // TODO: GetAssociatedAllocation should not care about multiple inputs
       // if we are in prepare_for_register_allocation pass only.
       constructor_fence->GetBlock()->RemoveInstruction(constructor_fence);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedPFRA);
       return;
-      // TODO: actually remove the dmb from the .S entrypoints (initialized variants only).
     }
 
     // HNewArray does not need this check because the art_quick_alloc_array does not itself
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 395d4ba..2c64f01 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -21,6 +21,8 @@
 
 namespace art {
 
+class OptimizingCompilerStats;
+
 /**
  * A simplification pass over the graph before doing register allocation.
  * For example it changes uses of null checks and bounds checks to the original
@@ -28,7 +30,9 @@
  */
 class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
  public:
-  explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphDelegateVisitor(graph) {}
+  explicit PrepareForRegisterAllocation(HGraph* graph,
+                                        OptimizingCompilerStats* stats = nullptr)
+      : HGraphDelegateVisitor(graph, stats) {}
 
   void Run();
 
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 3e373d1..38cd51b 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -724,8 +724,8 @@
       instruction->IsClassTableGet() ||
       instruction->IsCurrentMethod() ||
       instruction->IsDivZeroCheck() ||
-      instruction->IsInstanceFieldGet() ||
-      instruction->IsInstanceFieldSet() ||
+      (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
+      (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
       instruction->IsInstanceOf() ||
       instruction->IsInvokeInterface() ||
       instruction->IsInvokeStaticOrDirect() ||
@@ -741,14 +741,10 @@
       instruction->IsReturn() ||
       instruction->IsReturnVoid() ||
       instruction->IsSelect() ||
-      instruction->IsStaticFieldGet() ||
-      instruction->IsStaticFieldSet() ||
+      (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
+      (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
       instruction->IsSuspendCheck() ||
-      instruction->IsTypeConversion() ||
-      instruction->IsUnresolvedInstanceFieldGet() ||
-      instruction->IsUnresolvedInstanceFieldSet() ||
-      instruction->IsUnresolvedStaticFieldGet() ||
-      instruction->IsUnresolvedStaticFieldSet();
+      instruction->IsTypeConversion();
 }
 
 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index d911d73..e220d32 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -170,7 +170,7 @@
       block->MergeWith(merge_block);
     }
 
-    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+    MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
 
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index af3b447..9df1b74 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -82,6 +82,22 @@
   }
 }
 
+void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int code) {
+  // The Marking Register is only used in the Baker read barrier configuration.
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  vixl32::Label mr_is_ok;
+
+  // temp = self.tls32_.is.gc_marking
+  ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+  // Check that mr == self.tls32_.is.gc_marking.
+  ___ Cmp(mr, temp);
+  ___ B(eq, &mr_is_ok, /* far_target */ false);
+  ___ Bkpt(code);
+  ___ Bind(&mr_is_ok);
+}
+
 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
   // TODO(VIXL): Implement this optimization in VIXL.
   if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 66b22ea..9c11fd3 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -178,6 +178,7 @@
   //
   // Heap poisoning.
   //
+
   // Poison a heap reference contained in `reg`.
   void PoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg`.
@@ -187,6 +188,15 @@
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl32::Register reg);
 
+  // Emit code checking the status of the Marking Register, and aborting
+  // the program if MR does not match the value stored in the art::Thread
+  // object.
+  //
+  // Argument `temp` is used as a temporary register to generate code.
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck and is passed to the BKPT instruction.
+  void GenerateMarkingRegisterCheck(vixl32::Register temp, int code = 0);
+
   void StoreToOffset(StoreOperandType type,
                      vixl32::Register reg,
                      vixl32::Register base,
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 6ed0e9b..d8a48a5 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -158,6 +158,24 @@
   }
 }
 
+void Arm64Assembler::GenerateMarkingRegisterCheck(Register temp, int code) {
+  // The Marking Register is only used in the Baker read barrier configuration.
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  vixl::aarch64::Register mr = reg_x(MR);  // Marking Register.
+  vixl::aarch64::Register tr = reg_x(TR);  // Thread Register.
+  vixl::aarch64::Label mr_is_ok;
+
+  // temp = self.tls32_.is.gc_marking
+  ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+  // Check that mr == self.tls32_.is.gc_marking.
+  ___ Cmp(mr.W(), temp);
+  ___ B(eq, &mr_is_ok);
+  ___ Brk(code);
+  ___ Bind(&mr_is_ok);
+}
+
 #undef ___
 
 }  // namespace arm64
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 5b8a34e..6b28363 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -98,6 +98,15 @@
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
 
+  // Emit code checking the status of the Marking Register, and aborting
+  // the program if MR does not match the value stored in the art::Thread
+  // object.
+  //
+  // Argument `temp` is used as a temporary register to generate code.
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction.
+  void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0);
+
   void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64";
   }
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index bab84be..9732b76 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -662,7 +662,7 @@
   ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64());
 }
 
-void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
+void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception* exception) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
   Register temp = temps.AcquireX();
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 59a1a48..a8ca111 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -216,8 +216,15 @@
    */
   virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
 
+  void SetEmitRunTimeChecksInDebugMode(bool value) {
+    emit_run_time_checks_in_debug_mode_ = value;
+  }
+
  protected:
-  explicit JNIMacroAssembler() {}
+  JNIMacroAssembler() {}
+
+  // Should run-time checks be emitted in debug mode?
+  bool emit_run_time_checks_in_debug_mode_ = false;
 };
 
 // A "Label" class used with the JNIMacroAssembler
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index ec14e7a..9f2c44d 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -2012,7 +2012,7 @@
   x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI);
 
   size_t frame_size = 10 * kStackAlignment;
-  assembler->BuildFrame(10 * kStackAlignment, method_reg, spill_regs, entry_spills);
+  assembler->BuildFrame(frame_size, method_reg, spill_regs, entry_spills);
 
   // Construct assembly text counterpart.
   std::ostringstream str;
@@ -2048,7 +2048,7 @@
   ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   size_t frame_size = 10 * kStackAlignment;
-  assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
+  assembler->RemoveFrame(frame_size, spill_regs);
 
   // Construct assembly text counterpart.
   std::ostringstream str;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index a6036da..e9ec5fa 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -773,6 +773,11 @@
     compiler_options_->boot_image_ = !image_filenames_.empty();
     compiler_options_->app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
 
+    if (IsBootImage() && image_filenames_.size() == 1) {
+      const std::string& boot_image_filename = image_filenames_[0];
+      compiler_options_->core_image_ = CompilerDriver::IsCoreImageFilename(boot_image_filename);
+    }
+
     if (IsAppImage() && IsBootImage()) {
       Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
     }
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index c5c4eda..8fdd470 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -363,7 +363,7 @@
   // Check file size from the header.
   uint32_t expected_size = header_->file_size_;
   if (size_ != expected_size) {
-    ErrorStringPrintf("Bad file size (%zd, expected %ud)", size_, expected_size);
+    ErrorStringPrintf("Bad file size (%zd, expected %u)", size_, expected_size);
     return false;
   }
 
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index c2a8de3..c994127 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -191,14 +191,10 @@
   VLOG(heap) << "Size " << GetMemMap()->Size();
   VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
   VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail. Pass MAP_PRIVATE since we don't want to share the same ashmem as the zygote
-  // space.
+  // Remap the tail.
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(),
-                                                          alloc_space_name,
-                                                          PROT_READ | PROT_WRITE,
-                                                          MAP_PRIVATE,
-                                                          &error_msg));
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name,
+                                                          PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
   void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity,
                                     low_memory_mode);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 31319f1..47ace7f 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -47,13 +47,9 @@
 static constexpr int kProtAll = PROT_READ | PROT_WRITE | PROT_EXEC;
 static constexpr int kProtData = PROT_READ | PROT_WRITE;
 static constexpr int kProtCode = PROT_READ | PROT_EXEC;
-static constexpr int kProtReadOnly = PROT_READ;
-static constexpr int kProtNone = PROT_NONE;
 
 static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
 static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
-static constexpr size_t kMinMapSpacingPages = 1;
-static constexpr size_t kMaxMapSpacingPages = 128;
 
 #define CHECKED_MPROTECT(memory, size, prot)                \
   do {                                                      \
@@ -64,39 +60,12 @@
     }                                                       \
   } while (false)                                           \
 
-static MemMap* SplitMemMap(MemMap* existing_map,
-                           const char* name,
-                           size_t split_offset,
-                           int split_prot,
-                           std::string* error_msg,
-                           bool use_ashmem,
-                           unique_fd* shmem_fd = nullptr) {
-  std::string error_str;
-  uint8_t* divider = existing_map->Begin() + split_offset;
-  MemMap* new_map = existing_map->RemapAtEnd(divider,
-                                             name,
-                                             split_prot,
-                                             MAP_SHARED,
-                                             &error_str,
-                                             use_ashmem,
-                                             shmem_fd);
-  if (new_map == nullptr) {
-    std::ostringstream oss;
-    oss << "Failed to create spacing for " << name << ": "
-        << error_str << " offset=" << split_offset;
-    *error_msg = oss.str();
-    return nullptr;
-  }
-  return new_map;
-}
-
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
                                    bool generate_debug_info,
                                    std::string* error_msg) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  CHECK_GT(max_capacity, initial_capacity);
-  CHECK_GE(max_capacity - kMaxMapSpacingPages * kPageSize, initial_capacity);
+  CHECK_GE(max_capacity, initial_capacity);
 
   // Generating debug information is for using the Linux perf tool on
   // host which does not work with ashmem.
@@ -106,10 +75,6 @@
   // With 'perf', we want a 1-1 mapping between an address and a method.
   bool garbage_collect_code = !generate_debug_info;
 
-  // We only use two mappings (separating rw from rx) if we are able to use ashmem.
-  // See the above comment for debug information and not using ashmem.
-  bool use_two_mappings = use_ashmem;
-
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -121,10 +86,6 @@
     return nullptr;
   }
 
-  // Align both capacities to page size, as that's the unit mspaces use.
-  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
-  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
-
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   // Map in low 4gb to simplify accessing root tables for x86_64.
@@ -146,138 +107,35 @@
     return nullptr;
   }
 
-  // Create a region for JIT data and executable code. This will be
-  // laid out as:
-  //
-  //          +----------------+ --------------------
-  //          | code_sync_map_ | ^ code_sync_size   ^
-  //          |                | v                  |
-  //          +----------------+ --                 |
-  //          :                : ^                  |
-  //          :  post_code_map : | post_code_size   |
-  //          :   [padding]    : v                  |
-  //          +----------------+ -                  |
-  //          |                | ^                  |
-  //          |   code_map     | | code_size        | total_mapping_size
-  //          |   [JIT Code]   | v                  |
-  //          +----------------+ -                  |
-  //          :                : ^                  |
-  //          :  pre_code_map  : | pre_code_size    |
-  //          :   [padding]    : v                  |
-  //          +----------------+ -                  |
-  //          |                | ^                  |
-  //          |    data_map    | | data_size        |
-  //          |   [Jit Data]   | v                  v
-  //          +----------------+ --------------------
-  //
-  // The code_sync_map_ contains a page that we use flush CPU instruction
-  // pipelines (see FlushInstructionPipelines()).
-  //
-  // The padding regions - pre_code_map and post_code_map - exist to
-  // put some random distance between the writable JIT code mapping
-  // and the executable mapping. The padding is discarded at the end
-  // of this function.
-  //
-  size_t data_size = (max_capacity - kMaxMapSpacingPages * kPageSize) / 2;
-  size_t pre_code_size =
-      GetRandomNumber(kMinMapSpacingPages, kMaxMapSpacingPages - 1) * kPageSize;
-  size_t code_size = max_capacity - data_size - kMaxMapSpacingPages * kPageSize;
-  size_t code_sync_size = kPageSize;
-  size_t post_code_size = kMaxMapSpacingPages * kPageSize - pre_code_size - code_sync_size;
-  DCHECK_EQ(data_size, code_size);
-  DCHECK_EQ(pre_code_size + post_code_size + code_sync_size, kMaxMapSpacingPages * kPageSize);
-  DCHECK_EQ(data_size + pre_code_size + code_size + post_code_size + code_sync_size, max_capacity);
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
 
-  // Create pre-code padding region after data region, discarded after
-  // code and data regions are set-up.
-  std::unique_ptr<MemMap> pre_code_map(SplitMemMap(data_map.get(),
-                                                   "jit-code-cache-padding",
-                                                   data_size,
-                                                   kProtNone,
-                                                   error_msg,
-                                                   use_ashmem));
-  if (pre_code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(data_map->Size(), data_size);
-  DCHECK_EQ(pre_code_map->Size(), pre_code_size + code_size + post_code_size + code_sync_size);
+  // Data cache is 1 / 2 of the map.
+  // TODO: Make this variable?
+  size_t data_size = max_capacity / 2;
+  size_t code_size = max_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, max_capacity);
+  uint8_t* divider = data_map->Begin() + data_size;
 
-  // Create code region.
-  unique_fd writable_code_fd;
-  std::unique_ptr<MemMap> code_map(SplitMemMap(pre_code_map.get(),
-                                               "jit-code-cache",
-                                               pre_code_size,
-                                               use_two_mappings ? kProtCode : kProtAll,
-                                               error_msg,
-                                               use_ashmem,
-                                               &writable_code_fd));
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
   if (code_map == nullptr) {
+    std::ostringstream oss;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
+    *error_msg = oss.str();
     return nullptr;
   }
-  DCHECK_EQ(pre_code_map->Size(), pre_code_size);
-  DCHECK_EQ(code_map->Size(), code_size + post_code_size + code_sync_size);
-
-  // Padding after code region, discarded after code and data regions
-  // are set-up.
-  std::unique_ptr<MemMap> post_code_map(SplitMemMap(code_map.get(),
-                                                    "jit-code-cache-padding",
-                                                    code_size,
-                                                    kProtNone,
-                                                    error_msg,
-                                                    use_ashmem));
-  if (post_code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(code_map->Size(), code_size);
-  DCHECK_EQ(post_code_map->Size(), post_code_size + code_sync_size);
-
-  std::unique_ptr<MemMap> code_sync_map(SplitMemMap(post_code_map.get(),
-                                                    "jit-code-sync",
-                                                    post_code_size,
-                                                    kProtCode,
-                                                    error_msg,
-                                                    use_ashmem));
-  if (code_sync_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(post_code_map->Size(), post_code_size);
-  DCHECK_EQ(code_sync_map->Size(), code_sync_size);
-
-  std::unique_ptr<MemMap> writable_code_map;
-  if (use_two_mappings) {
-    // Allocate the R/W view.
-    writable_code_map.reset(MemMap::MapFile(code_size,
-                                            kProtData,
-                                            MAP_SHARED,
-                                            writable_code_fd.get(),
-                                            /* start */ 0,
-                                            /* low_4gb */ true,
-                                            "jit-writable-code",
-                                            &error_str));
-    if (writable_code_map == nullptr) {
-      std::ostringstream oss;
-      oss << "Failed to create writable code cache: " << error_str << " size=" << code_size;
-      *error_msg = oss.str();
-      return nullptr;
-    }
-  }
+  DCHECK_EQ(code_map->Begin(), divider);
   data_size = initial_capacity / 2;
   code_size = initial_capacity - data_size;
   DCHECK_EQ(code_size + data_size, initial_capacity);
-  return new JitCodeCache(writable_code_map.release(),
-                          code_map.release(),
-                          data_map.release(),
-                          code_sync_map.release(),
-                          code_size,
-                          data_size,
-                          max_capacity,
-                          garbage_collect_code);
+  return new JitCodeCache(
+      code_map, data_map.release(), code_size, data_size, max_capacity, garbage_collect_code);
 }
 
-JitCodeCache::JitCodeCache(MemMap* writable_code_map,
-                           MemMap* executable_code_map,
+JitCodeCache::JitCodeCache(MemMap* code_map,
                            MemMap* data_map,
-                           MemMap* code_sync_map,
                            size_t initial_code_capacity,
                            size_t initial_data_capacity,
                            size_t max_capacity,
@@ -285,10 +143,8 @@
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache condition variable", lock_),
       collection_in_progress_(false),
+      code_map_(code_map),
       data_map_(data_map),
-      executable_code_map_(executable_code_map),
-      writable_code_map_(writable_code_map),
-      code_sync_map_(code_sync_map),
       max_capacity_(max_capacity),
       current_capacity_(initial_code_capacity + initial_data_capacity),
       code_end_(initial_code_capacity),
@@ -308,8 +164,7 @@
       inline_cache_cond_("Jit inline cache condition variable", lock_) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
-  MemMap* writable_map = GetWritableMemMap();
-  code_mspace_ = create_mspace_with_base(writable_map->Begin(), code_end_, false /*locked*/);
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
   if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
@@ -318,10 +173,7 @@
 
   SetFootprintLimit(current_capacity_);
 
-  if (writable_code_map_ != nullptr) {
-    CHECKED_MPROTECT(writable_code_map_->Begin(), writable_code_map_->Size(), kProtReadOnly);
-  }
-  CHECKED_MPROTECT(executable_code_map_->Begin(), executable_code_map_->Size(), kProtCode);
+  CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
 
   VLOG(jit) << "Created jit code cache: initial data size="
@@ -331,7 +183,7 @@
 }
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
-  return executable_code_map_->Begin() <= ptr && ptr < executable_code_map_->End();
+  return code_map_->Begin() <= ptr && ptr < code_map_->End();
 }
 
 bool JitCodeCache::ContainsMethod(ArtMethod* method) {
@@ -344,96 +196,27 @@
   return false;
 }
 
-/* This method is only for CHECK/DCHECK that pointers are within to a region. */
-static bool IsAddressInMap(const void* addr,
-                           const MemMap* mem_map,
-                           const char* check_name) {
-  if (addr == nullptr || mem_map->HasAddress(addr)) {
-    return true;
-  }
-  LOG(ERROR) << "Is" << check_name << "Address " << addr
-             << " not in [" << reinterpret_cast<void*>(mem_map->Begin())
-             << ", " << reinterpret_cast<void*>(mem_map->Begin() + mem_map->Size()) << ")";
-  return false;
-}
-
-bool JitCodeCache::IsDataAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, data_map_.get(), "Data");
-}
-
-bool JitCodeCache::IsExecutableAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, executable_code_map_.get(), "Executable");
-}
-
-bool JitCodeCache::IsWritableAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, GetWritableMemMap(), "Writable");
-}
-
-// Convert one address within the source map to the same offset within the destination map.
-static void* ConvertAddress(const void* source_address,
-                            const MemMap* source_map,
-                            const MemMap* destination_map) {
-  DCHECK(source_map->HasAddress(source_address)) << source_address;
-  ptrdiff_t offset = reinterpret_cast<const uint8_t*>(source_address) - source_map->Begin();
-  uintptr_t address = reinterpret_cast<uintptr_t>(destination_map->Begin()) + offset;
-  return reinterpret_cast<void*>(address);
-}
-
-template <typename T>
-T* JitCodeCache::ToExecutableAddress(T* writable_address) const {
-  CHECK(IsWritableAddress(writable_address));
-  if (writable_address == nullptr) {
-    return nullptr;
-  }
-  void* executable_address = ConvertAddress(writable_address,
-                                            GetWritableMemMap(),
-                                            executable_code_map_.get());
-  CHECK(IsExecutableAddress(executable_address));
-  return reinterpret_cast<T*>(executable_address);
-}
-
-void* JitCodeCache::ToWritableAddress(const void* executable_address) const {
-  CHECK(IsExecutableAddress(executable_address));
-  if (executable_address == nullptr) {
-    return nullptr;
-  }
-  void* writable_address = ConvertAddress(executable_address,
-                                          executable_code_map_.get(),
-                                          GetWritableMemMap());
-  CHECK(IsWritableAddress(writable_address));
-  return writable_address;
-}
-
 class ScopedCodeCacheWrite : ScopedTrace {
  public:
-  explicit ScopedCodeCacheWrite(JitCodeCache* code_cache)
-      : ScopedTrace("ScopedCodeCacheWrite") {
+  explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false)
+      : ScopedTrace("ScopedCodeCacheWrite"),
+        code_map_(code_map),
+        only_for_tlb_shootdown_(only_for_tlb_shootdown) {
     ScopedTrace trace("mprotect all");
-    int prot_to_start_writing = kProtAll;
-    if (code_cache->writable_code_map_ == nullptr) {
-      // If there is only one mapping, use the executable mapping and toggle between rwx and rx.
-      prot_to_start_writing = kProtAll;
-      prot_to_stop_writing_ = kProtCode;
-    } else {
-      // If there are two mappings, use the writable mapping and toggle between rw and r.
-      prot_to_start_writing = kProtData;
-      prot_to_stop_writing_ = kProtReadOnly;
-    }
-    writable_map_ = code_cache->GetWritableMemMap();
-    // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
-    // one page.
-    size_ = writable_map_->Size();
-    CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_start_writing);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtAll);
   }
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_stop_writing_);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtCode);
   }
-
  private:
-  int prot_to_stop_writing_;
-  MemMap* writable_map_;
-  size_t size_;
+  MemMap* const code_map_;
+
+  // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
+  // one page.
+  const bool only_for_tlb_shootdown_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedCodeCacheWrite);
 };
@@ -448,6 +231,7 @@
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
                                   size_t code_size,
+                                  size_t data_size,
                                   bool osr,
                                   Handle<mirror::ObjectArray<mirror::Object>> roots,
                                   bool has_should_deoptimize_flag,
@@ -462,6 +246,7 @@
                                        fp_spill_mask,
                                        code,
                                        code_size,
+                                       data_size,
                                        osr,
                                        roots,
                                        has_should_deoptimize_flag,
@@ -479,6 +264,7 @@
                                 fp_spill_mask,
                                 code,
                                 code_size,
+                                data_size,
                                 osr,
                                 roots,
                                 has_should_deoptimize_flag,
@@ -540,10 +326,8 @@
   }
 }
 
-uint8_t* JitCodeCache::GetRootTable(const void* code_ptr, uint32_t* number_of_roots) {
-  CHECK(IsExecutableAddress(code_ptr));
+static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) {
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  // GetOptimizedCodeInfoPtr uses offsets relative to the EXECUTABLE address.
   uint8_t* data = method_header->GetOptimizedCodeInfoPtr();
   uint32_t roots = GetNumberOfRoots(data);
   if (number_of_roots != nullptr) {
@@ -588,8 +372,6 @@
 void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
   MutexLock mu(Thread::Current(), lock_);
   for (const auto& entry : method_code_map_) {
-    // GetRootTable takes an EXECUTABLE address.
-    CHECK(IsExecutableAddress(entry.first));
     uint32_t number_of_roots = 0;
     uint8_t* roots_data = GetRootTable(entry.first, &number_of_roots);
     GcRoot<mirror::Object>* roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
@@ -627,19 +409,17 @@
   }
 }
 
-void JitCodeCache::FreeCodeAndData(const void* code_ptr) {
-  CHECK(IsExecutableAddress(code_ptr));
+void JitCodeCache::FreeCode(const void* code_ptr) {
+  uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  // GetRootTable takes an EXECUTABLE address.
   FreeData(GetRootTable(code_ptr));
-  FreeRawCode(reinterpret_cast<uint8_t*>(FromCodeToAllocation(code_ptr)));
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
 void JitCodeCache::FreeAllMethodHeaders(
     const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
-  // method_headers are expected to be in the executable region.
   {
     MutexLock mu(Thread::Current(), *Locks::cha_lock_);
     Runtime::Current()->GetClassLinker()->GetClassHierarchyAnalysis()
@@ -651,9 +431,9 @@
   // so it's possible for the same method_header to start representing
   // different compile code.
   MutexLock mu(Thread::Current(), lock_);
-  ScopedCodeCacheWrite scc(this);
+  ScopedCodeCacheWrite scc(code_map_.get());
   for (const OatQuickMethodHeader* method_header : method_headers) {
-    FreeCodeAndData(method_header->GetCode());
+    FreeCode(method_header->GetCode());
   }
 }
 
@@ -670,10 +450,9 @@
     // with the classlinker_classes_lock_ held, and suspending ourselves could
     // lead to a deadlock.
     {
-      ScopedCodeCacheWrite scc(this);
+      ScopedCodeCacheWrite scc(code_map_.get());
       for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
         if (alloc.ContainsUnsafe(it->second)) {
-          CHECK(IsExecutableAddress(OatQuickMethodHeader::FromCodePointer(it->first)));
           method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
           it = method_code_map_.erase(it);
         } else {
@@ -765,129 +544,6 @@
   method->SetCounter(std::min(jit_warmup_threshold - 1, 1));
 }
 
-static void FlushInstructionPiplines(uint8_t* sync_page) {
-  // After updating the JIT code cache we need to force all CPUs to
-  // flush their instruction pipelines. In the absence of system call
-  // to do this explicitly, we can achieve this indirectly by toggling
-  // permissions on an executable page. This should send an IPI to
-  // each core to update the TLB entry with the interrupt raised on
-  // each core causing the instruction pipeline to be flushed.
-  CHECKED_MPROTECT(sync_page, kPageSize, kProtAll);
-  // Ensure the sync_page is present otherwise a TLB update may not be
-  // necessary.
-  sync_page[0] = 0;
-  CHECKED_MPROTECT(sync_page, kPageSize, kProtCode);
-}
-
-#ifdef __aarch64__
-
-static void FlushJitCodeCacheRange(uint8_t* code_ptr,
-                                   uint8_t* writable_ptr,
-                                   size_t code_size) {
-  // Cache maintenance instructions can cause permission faults when a
-  // page is not present (e.g. swapped out or not backed). These
-  // faults should be handled by the kernel, but a bug in some Linux
-  // kernels may surface these permission faults to user-land which
-  // does not currently deal with them (b/63885946). To work around
-  // this, we read a value from each page to fault it in before
-  // attempting to perform cache maintenance operations.
-  //
-  // For reference, this behavior is caused by this commit:
-  // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
-
-  // The cache-line size could be probed for from the CPU, but
-  // assuming a safe lower bound is safe for CPUs that have different
-  // cache-line sizes for big and little cores.
-  static const uintptr_t kSafeCacheLineSize = 32;
-
-  // Ensure stores are present in L1 data cache.
-  __asm __volatile("dsb ish" ::: "memory");
-
-  volatile uint8_t mutant;
-
-  // Push dirty cache-lines out to the point of unification (PoU). The
-  // point of unification is the first point in the cache/memory
-  // hierarchy where the instruction cache and data cache have the
-  // same view of memory. The PoU is where an instruction fetch will
-  // fetch the new code generated by the JIT.
-  //
-  // See: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch11s04.html
-  uintptr_t writable_addr = RoundDown(reinterpret_cast<uintptr_t>(writable_ptr),
-                                      kSafeCacheLineSize);
-  uintptr_t writable_end  = RoundUp(reinterpret_cast<uintptr_t>(writable_ptr) + code_size,
-                                    kSafeCacheLineSize);
-  while (writable_addr < writable_end) {
-    // Read from the cache-line to minimize the chance that a cache
-    // maintenance instruction causes a fault (see kernel bug comment
-    // above).
-    mutant = *reinterpret_cast<const uint8_t*>(writable_addr);
-
-    // Flush cache-line
-    __asm volatile("dc cvau, %0" :: "r"(writable_addr) : "memory");
-    writable_addr += kSafeCacheLineSize;
-  }
-
-  __asm __volatile("dsb ish" ::: "memory");
-
-  uintptr_t code_addr = RoundDown(reinterpret_cast<uintptr_t>(code_ptr), kSafeCacheLineSize);
-  const uintptr_t code_end = RoundUp(reinterpret_cast<uintptr_t>(code_ptr) + code_size,
-                                     kSafeCacheLineSize);
-  while (code_addr < code_end) {
-    // Read from the cache-line to minimize the chance that a cache
-    // maintenance instruction causes a fault (see kernel bug comment
-    // above).
-    mutant = *reinterpret_cast<const uint8_t*>(code_addr);
-
-    // Invalidating the data cache line is only strictly necessary
-    // when the JIT code cache has two mappings (the default). We know
-    // this cache line is clean so this is just invalidating it (using
-    // "dc ivac" would be preferable, but counts as a write and this
-    // memory may not be mapped write permission).
-    __asm volatile("dc cvau, %0" :: "r"(code_addr) : "memory");
-
-    // Invalidate the instruction cache line to force instructions in
-    // range to be re-fetched following update.
-    __asm volatile("ic ivau, %0" :: "r"(code_addr) : "memory");
-
-    code_addr += kSafeCacheLineSize;
-  }
-
-  // Wait for code cache invalidations to complete.
-  __asm __volatile("dsb ish" ::: "memory");
-
-  // Reset fetched instruction stream.
-  __asm __volatile("isb");
-}
-
-#else  // __aarch64
-
-static void FlushJitCodeCacheRange(uint8_t* code_ptr,
-                                   uint8_t* writable_ptr,
-                                   size_t code_size) {
-  if (writable_ptr != code_ptr) {
-    // When there are two mappings of the JIT code cache, RX and
-    // RW, flush the RW version first as we've just dirtied the
-    // cache lines with new code. Flushing the RX version first
-    // can cause a permission fault as the those addresses are not
-    // writable, but can appear dirty in the cache. There is a lot
-    // of potential subtlety here depending on how the cache is
-    // indexed and tagged.
-    //
-    // Flushing the RX version after the RW version is just
-    // invalidating cachelines in the instruction cache. This is
-    // necessary as the instruction cache will often have a
-    // different set of cache lines present and because the JIT
-    // code cache can start a new function at any boundary within
-    // a cache-line.
-    FlushDataCache(reinterpret_cast<char*>(writable_ptr),
-                   reinterpret_cast<char*>(writable_ptr + code_size));
-  }
-  FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
-                        reinterpret_cast<char*>(code_ptr + code_size));
-}
-
-#endif  // __aarch64
-
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -898,6 +554,7 @@
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
                                           size_t code_size,
+                                          size_t data_size,
                                           bool osr,
                                           Handle<mirror::ObjectArray<mirror::Object>> roots,
                                           bool has_should_deoptimize_flag,
@@ -917,37 +574,35 @@
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
     {
-      ScopedCodeCacheWrite scc(this);
+      ScopedCodeCacheWrite scc(code_map_.get());
       memory = AllocateCode(total_size);
       if (memory == nullptr) {
         return nullptr;
       }
-      uint8_t* writable_ptr = memory + header_size;
-      code_ptr = ToExecutableAddress(writable_ptr);
+      code_ptr = memory + header_size;
 
-      std::copy(code, code + code_size, writable_ptr);
-      OatQuickMethodHeader* writable_method_header =
-          OatQuickMethodHeader::FromCodePointer(writable_ptr);
-      // We need to be able to write the OatQuickMethodHeader, so we use writable_method_header.
-      // Otherwise, the offsets encoded in OatQuickMethodHeader are used relative to an executable
-      // address, so we use code_ptr.
-      new (writable_method_header) OatQuickMethodHeader(
+      std::copy(code, code + code_size, code_ptr);
+      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      new (method_header) OatQuickMethodHeader(
           code_ptr - stack_map,
           code_ptr - method_info,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
           code_size);
-
-      FlushJitCodeCacheRange(code_ptr, writable_ptr, code_size);
-      FlushInstructionPiplines(code_sync_map_->Begin());
-
+      // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
+      // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
+      // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
+      // 6P) stop being supported or their kernels are fixed.
+      //
+      // For reference, this behavior is caused by this commit:
+      // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+      FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
+                            reinterpret_cast<char*>(code_ptr + code_size));
       DCHECK(!Runtime::Current()->IsAotCompiler());
       if (has_should_deoptimize_flag) {
-        writable_method_header->SetHasShouldDeoptimizeFlag();
+        method_header->SetHasShouldDeoptimizeFlag();
       }
-      // All the pointers exported from the cache are executable addresses.
-      method_header = ToExecutableAddress(writable_method_header);
     }
 
     number_of_compilations_++;
@@ -986,14 +641,16 @@
     // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     // Fill the root table before updating the entry point.
-    CHECK(IsDataAddress(roots_data));
     DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
     DCHECK_LE(roots_data, stack_map);
     FillRootTable(roots_data, roots);
-
-    // Ensure the updates to the root table are visible with a store fence.
-    QuasiAtomic::ThreadFenceSequentiallyConsistent();
-
+    {
+      // Flush data cache, as compiled code references literals in it.
+      // We also need a TLB shootdown to act as memory barrier across cores.
+      ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
+      FlushDataCache(reinterpret_cast<char*>(roots_data),
+                     reinterpret_cast<char*>(roots_data + data_size));
+    }
     method_code_map_.Put(code_ptr, method);
     if (osr) {
       number_of_osr_compilations_++;
@@ -1041,11 +698,11 @@
 
   bool in_cache = false;
   {
-    ScopedCodeCacheWrite ccw(this);
+    ScopedCodeCacheWrite ccw(code_map_.get());
     for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
       if (code_iter->second == method) {
         if (release_memory) {
-          FreeCodeAndData(code_iter->first);
+          FreeCode(code_iter->first);
         }
         code_iter = method_code_map_.erase(code_iter);
         in_cache = true;
@@ -1099,10 +756,10 @@
     profiling_infos_.erase(profile);
   }
   method->SetProfilingInfo(nullptr);
-  ScopedCodeCacheWrite ccw(this);
+  ScopedCodeCacheWrite ccw(code_map_.get());
   for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
     if (code_iter->second == method) {
-      FreeCodeAndData(code_iter->first);
+      FreeCode(code_iter->first);
       code_iter = method_code_map_.erase(code_iter);
       continue;
     }
@@ -1168,7 +825,6 @@
                              uint8_t* stack_map_data,
                              uint8_t* roots_data) {
   DCHECK_EQ(FromStackMapToRoots(stack_map_data), roots_data);
-  CHECK(IsDataAddress(roots_data));
   MutexLock mu(self, lock_);
   FreeData(reinterpret_cast<uint8_t*>(roots_data));
 }
@@ -1290,11 +946,11 @@
 
 void JitCodeCache::SetFootprintLimit(size_t new_footprint) {
   size_t per_space_footprint = new_footprint / 2;
-  CHECK(IsAlignedParam(per_space_footprint, kPageSize));
+  DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
   DCHECK_EQ(per_space_footprint * 2, new_footprint);
   mspace_set_footprint_limit(data_mspace_, per_space_footprint);
   {
-    ScopedCodeCacheWrite scc(this);
+    ScopedCodeCacheWrite scc(code_map_.get());
     mspace_set_footprint_limit(code_mspace_, per_space_footprint);
   }
 }
@@ -1370,8 +1026,8 @@
       number_of_collections_++;
       live_bitmap_.reset(CodeCacheBitmap::Create(
           "code-cache-bitmap",
-          reinterpret_cast<uintptr_t>(executable_code_map_->Begin()),
-          reinterpret_cast<uintptr_t>(executable_code_map_->Begin() + current_capacity_ / 2)));
+          reinterpret_cast<uintptr_t>(code_map_->Begin()),
+          reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2)));
       collection_in_progress_ = true;
     }
   }
@@ -1443,16 +1099,14 @@
   std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
     MutexLock mu(self, lock_);
-    ScopedCodeCacheWrite scc(this);
+    ScopedCodeCacheWrite scc(code_map_.get());
     // Iterate over all compiled code and remove entries that are not marked.
     for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
       const void* code_ptr = it->first;
-      CHECK(IsExecutableAddress(code_ptr));
       uintptr_t allocation = FromCodeToAllocation(code_ptr);
       if (GetLiveBitmap()->Test(allocation)) {
         ++it;
       } else {
-        CHECK(IsExecutableAddress(it->first));
         method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
         it = method_code_map_.erase(it);
       }
@@ -1495,7 +1149,6 @@
     for (const auto& it : method_code_map_) {
       ArtMethod* method = it.second;
       const void* code_ptr = it.first;
-      CHECK(IsExecutableAddress(code_ptr));
       const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
         GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
@@ -1521,7 +1174,6 @@
     // Free all profiling infos of methods not compiled nor being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
       [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
-        CHECK(IsDataAddress(info));
         const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
         // We have previously cleared the ProfilingInfo pointer in the ArtMethod in the hope
         // that the compiled code would not get revived. As mutator threads run concurrently,
@@ -1582,7 +1234,6 @@
   --it;
 
   const void* code_ptr = it->first;
-  CHECK(IsExecutableAddress(code_ptr));
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   if (!method_header->Contains(pc)) {
     return nullptr;
@@ -1665,7 +1316,6 @@
   // store in the ArtMethod's ProfilingInfo pointer.
   QuasiAtomic::ThreadFenceRelease();
 
-  CHECK(IsDataAddress(info));
   method->SetProfilingInfo(info);
   profiling_infos_.push_back(info);
   histogram_profiling_info_memory_use_.AddValue(profile_info_size);
@@ -1678,8 +1328,7 @@
   if (code_mspace_ == mspace) {
     size_t result = code_end_;
     code_end_ += increment;
-    MemMap* writable_map = GetWritableMemMap();
-    return reinterpret_cast<void*>(result + writable_map->Begin());
+    return reinterpret_cast<void*>(result + code_map_->Begin());
   } else {
     DCHECK_EQ(data_mspace_, mspace);
     size_t result = data_end_;
@@ -1831,7 +1480,6 @@
 
 size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
   MutexLock mu(Thread::Current(), lock_);
-  CHECK(IsExecutableAddress(ptr));
   return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
 }
 
@@ -1867,27 +1515,22 @@
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   // Ensure the header ends up at expected instruction alignment.
   DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
-  CHECK(IsWritableAddress(result));
   used_memory_for_code_ += mspace_usable_size(result);
   return result;
 }
 
-void JitCodeCache::FreeRawCode(void* code) {
-  CHECK(IsExecutableAddress(code));
-  void* writable_code = ToWritableAddress(code);
-  used_memory_for_code_ -= mspace_usable_size(writable_code);
-  mspace_free(code_mspace_, writable_code);
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
 }
 
 uint8_t* JitCodeCache::AllocateData(size_t data_size) {
   void* result = mspace_malloc(data_mspace_, data_size);
-  CHECK(IsDataAddress(reinterpret_cast<uint8_t*>(result)));
   used_memory_for_data_ += mspace_usable_size(result);
   return reinterpret_cast<uint8_t*>(result);
 }
 
 void JitCodeCache::FreeData(uint8_t* data) {
-  CHECK(IsDataAddress(data));
   used_memory_for_data_ -= mspace_usable_size(data);
   mspace_free(data_mspace_, data);
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 175501f..daa1d61 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -113,6 +113,7 @@
                       size_t fp_spill_mask,
                       const uint8_t* code,
                       size_t code_size,
+                      size_t data_size,
                       bool osr,
                       Handle<mirror::ObjectArray<mirror::Object>> roots,
                       bool has_should_deoptimize_flag,
@@ -228,8 +229,6 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr);
-
   // The GC needs to disallow the reading of inline caches when it processes them,
   // to avoid having a class being used while it is being deleted.
   void AllowInlineCacheAccess() REQUIRES(!lock_);
@@ -248,13 +247,9 @@
   }
 
  private:
-  friend class ScopedCodeCacheWrite;
-
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
                MemMap* data_map,
-               MemMap* writable_code_map,
-               MemMap* code_sync_map,
                size_t initial_code_capacity,
                size_t initial_data_capacity,
                size_t max_capacity,
@@ -272,6 +267,7 @@
                               size_t fp_spill_mask,
                               const uint8_t* code,
                               size_t code_size,
+                              size_t data_size,
                               bool osr,
                               Handle<mirror::ObjectArray<mirror::Object>> roots,
                               bool has_should_deoptimize_flag,
@@ -296,7 +292,7 @@
       REQUIRES(!Locks::cha_lock_);
 
   // Free in the mspace allocations for `code_ptr`.
-  void FreeCodeAndData(const void* code_ptr) REQUIRES(lock_);
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
@@ -329,7 +325,7 @@
   bool CheckLiveCompiledCodeHasProfilingInfo()
       REQUIRES(lock_);
 
-  void FreeRawCode(void* code) REQUIRES(lock_);
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
   uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
   void FreeData(uint8_t* data) REQUIRES(lock_);
   uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
@@ -339,61 +335,25 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  MemMap* GetWritableMemMap() const {
-    if (writable_code_map_ == nullptr) {
-      // The system required us to map the JIT Code Cache RWX (see
-      // JitCodeCache::Create()).
-      return executable_code_map_.get();
-    } else {
-      // Executable code is mapped RX, and writable code is mapped RW
-      // to the underlying same memory, but at a different address.
-      return writable_code_map_.get();
-    }
-  }
-
-  bool IsDataAddress(const void* raw_addr) const;
-
-  bool IsExecutableAddress(const void* raw_addr) const;
-
-  bool IsWritableAddress(const void* raw_addr) const;
-
-  template <typename T>
-  T* ToExecutableAddress(T* writable_address) const;
-
-  void* ToWritableAddress(const void* executable_address) const;
-
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
   ConditionVariable lock_cond_ GUARDED_BY(lock_);
   // Whether there is a code cache collection in progress.
   bool collection_in_progress_ GUARDED_BY(lock_);
-  // JITting methods obviously requires both write and execute permissions on a region of memory.
-  // In tye typical (non-debugging) case, we separate the memory mapped view that can write the code
-  // from a view that the runtime uses to execute the code. Having these two views eliminates any
-  // single address region having rwx permissions.  An attacker could still write the writable
-  // address and then execute the executable address. We allocate the mappings with a random
-  // address relationship to each other which makes the attacker need two addresses rather than
-  // just one.  In the debugging case there is no file descriptor to back the
-  // shared memory, and hence we have to use a single mapping.
+  // Mem map which holds code.
+  std::unique_ptr<MemMap> code_map_;
   // Mem map which holds data (stack maps and profiling info).
   std::unique_ptr<MemMap> data_map_;
-  // Mem map which holds a non-writable view of code for JIT.
-  std::unique_ptr<MemMap> executable_code_map_;
-  // Mem map which holds a non-executable view of code for JIT.
-  std::unique_ptr<MemMap> writable_code_map_;
-  // Mem map which holds one executable page that we use for flushing instruction
-  // fetch buffers. The code on this page is never executed.
-  std::unique_ptr<MemMap> code_sync_map_;
   // The opaque mspace for allocating code.
   void* code_mspace_ GUARDED_BY(lock_);
   // The opaque mspace for allocating data.
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
-  // Holds non-writable compiled code associated to the ArtMethod.
+  // Holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
-  // Holds non-writable osr compiled code associated to the ArtMethod.
+  // Holds osr compiled code associated to the ArtMethod.
   SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4e82480..743604c 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -496,7 +496,7 @@
     MEMORY_TOOL_MAKE_UNDEFINED(base_begin_, base_size_);
     int result = munmap(base_begin_, base_size_);
     if (result == -1) {
-      PLOG(FATAL) << "munmap failed: " << BaseBegin() << "..." << BaseEnd();
+      PLOG(FATAL) << "munmap failed";
     }
   }
 
@@ -535,13 +535,8 @@
   }
 }
 
-MemMap* MemMap::RemapAtEnd(uint8_t* new_end,
-                           const char* tail_name,
-                           int tail_prot,
-                           int sharing_flags,
-                           std::string* error_msg,
-                           bool use_ashmem,
-                           unique_fd* shmem_fd) {
+MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
+                           std::string* error_msg, bool use_ashmem) {
   use_ashmem = use_ashmem && !kIsTargetLinux;
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
@@ -560,12 +555,6 @@
   size_ = new_end - reinterpret_cast<uint8_t*>(begin_);
   base_size_ = new_base_end - reinterpret_cast<uint8_t*>(base_begin_);
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
-  if (base_size_ == 0u) {
-    // All pages in this MemMap have been handed out. Invalidate base
-    // pointer to prevent the destructor calling munmap() on
-    // zero-length region (which can't succeed).
-    base_begin_ = nullptr;
-  }
   size_t tail_size = old_end - new_end;
   uint8_t* tail_base_begin = new_base_end;
   size_t tail_base_size = old_base_end - new_base_end;
@@ -573,14 +562,14 @@
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
   unique_fd fd;
-  int flags = MAP_ANONYMOUS | sharing_flags;
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
     std::string debug_friendly_name("dalvik-");
     debug_friendly_name += tail_name;
     fd.reset(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-    flags = MAP_FIXED | sharing_flags;
+    flags = MAP_PRIVATE | MAP_FIXED;
     if (fd.get() == -1) {
       *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
                                 tail_name, strerror(errno));
@@ -614,9 +603,6 @@
                               fd.get());
     return nullptr;
   }
-  if (shmem_fd != nullptr) {
-    shmem_fd->reset(fd.release());
-  }
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false);
 }
 
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index d8908ad..5603963 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -25,7 +25,6 @@
 #include <string>
 
 #include "android-base/thread_annotations.h"
-#include "android-base/unique_fd.h"
 
 namespace art {
 
@@ -38,8 +37,6 @@
 #define USE_ART_LOW_4G_ALLOCATOR 0
 #endif
 
-using android::base::unique_fd;
-
 #ifdef __linux__
 static constexpr bool kMadviseZeroes = true;
 #else
@@ -171,14 +168,11 @@
   }
 
   // Unmap the pages at end and remap them to create another memory map.
-  // sharing_flags should be either MAP_PRIVATE or MAP_SHARED.
   MemMap* RemapAtEnd(uint8_t* new_end,
                      const char* tail_name,
                      int tail_prot,
-                     int sharing_flags,
                      std::string* error_msg,
-                     bool use_ashmem = true,
-                     unique_fd* shmem_fd = nullptr);
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!MemMap::mem_maps_lock_);
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 99bf004..a4ebb16 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -74,7 +74,6 @@
     MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
                                 "MemMapTest_RemapAtEndTest_map1",
                                 PROT_READ | PROT_WRITE,
-                                MAP_PRIVATE,
                                 &error_msg);
     // Check the states of the two maps.
     EXPECT_EQ(m0->Begin(), base0) << error_msg;
@@ -457,7 +456,6 @@
   std::unique_ptr<MemMap> m1(m0->RemapAtEnd(base0 + 3 * page_size,
                                             "MemMapTest_AlignByTest_map1",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base1 = m1->Begin();
   ASSERT_TRUE(base1 != nullptr) << error_msg;
@@ -467,7 +465,6 @@
   std::unique_ptr<MemMap> m2(m1->RemapAtEnd(base1 + 4 * page_size,
                                             "MemMapTest_AlignByTest_map2",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base2 = m2->Begin();
   ASSERT_TRUE(base2 != nullptr) << error_msg;
@@ -477,7 +474,6 @@
   std::unique_ptr<MemMap> m3(m2->RemapAtEnd(base2 + 3 * page_size,
                                             "MemMapTest_AlignByTest_map1",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base3 = m3->Begin();
   ASSERT_TRUE(base3 != nullptr) << error_msg;
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index e8f947c..b955220 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -20,6 +20,7 @@
 
 #include <memory>
 
+#include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
@@ -134,6 +135,9 @@
   } else {
     // Fetch the next dex file. Return null if there is none.
     const uint8_t* data = cursor + reinterpret_cast<const DexFile::Header*>(cursor)->file_size_;
+    // Dex files are required to be 4 byte aligned. the OatWriter makes sure they are, see
+    // OatWriter::SeekToDexFiles.
+    data = AlignUp(data, 4);
     return (data == DexEnd()) ? nullptr : data;
   }
 }
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index fc7bcb2..0e85612 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -441,6 +441,29 @@
     }
   }
 
+  /// CHECK-START: int Main.feedsIntoDeopt(int[]) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.feedsIntoDeopt(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  /// CHECK-NOT: Phi
+  static int feedsIntoDeopt(int[] a) {
+    // Reduction should be removed.
+    int r = 0;
+    for (int i = 0; i < 100; i++) {
+      r += 10;
+    }
+    // Even though uses feed into deopts of BCE.
+    for (int i = 1; i < 100; i++) {
+      a[i] = a[i - 1];
+    }
+    return r;
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -556,6 +579,13 @@
 
     inductionMax(yy);
 
+    int[] f = new int[100];
+    f[0] = 11;
+    expectEquals(1000, feedsIntoDeopt(f));
+    for (int i = 0; i < 100; i++) {
+      expectEquals(11, f[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/663-odd-dex-size/classes.dex b/test/663-odd-dex-size/classes.dex
new file mode 100644
index 0000000..633e3a2
--- /dev/null
+++ b/test/663-odd-dex-size/classes.dex
Binary files differ
diff --git a/test/663-odd-dex-size/expected.txt b/test/663-odd-dex-size/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size/info.txt b/test/663-odd-dex-size/info.txt
new file mode 100644
index 0000000..11a50e0
--- /dev/null
+++ b/test/663-odd-dex-size/info.txt
@@ -0,0 +1,14 @@
+Test for a dex file with an odd size in a vdex file.
+
+The code in the file is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
diff --git a/test/663-odd-dex-size2/663-odd-dex-size2.jar b/test/663-odd-dex-size2/663-odd-dex-size2.jar
new file mode 100644
index 0000000..a29224e
--- /dev/null
+++ b/test/663-odd-dex-size2/663-odd-dex-size2.jar
Binary files differ
diff --git a/test/663-odd-dex-size2/build b/test/663-odd-dex-size2/build
new file mode 100644
index 0000000..5636558
--- /dev/null
+++ b/test/663-odd-dex-size2/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nothing to do
diff --git a/test/663-odd-dex-size2/expected.txt b/test/663-odd-dex-size2/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size2/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size2/info.txt b/test/663-odd-dex-size2/info.txt
new file mode 100644
index 0000000..900394d
--- /dev/null
+++ b/test/663-odd-dex-size2/info.txt
@@ -0,0 +1,15 @@
+Test for two files with an odd size in a vdex file.
+
+The code in boths file is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
+
diff --git a/test/663-odd-dex-size3/663-odd-dex-size3.jar b/test/663-odd-dex-size3/663-odd-dex-size3.jar
new file mode 100644
index 0000000..d23ed57
--- /dev/null
+++ b/test/663-odd-dex-size3/663-odd-dex-size3.jar
Binary files differ
diff --git a/test/663-odd-dex-size3/build b/test/663-odd-dex-size3/build
new file mode 100644
index 0000000..5636558
--- /dev/null
+++ b/test/663-odd-dex-size3/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nothing to do
diff --git a/test/663-odd-dex-size3/expected.txt b/test/663-odd-dex-size3/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size3/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size3/info.txt b/test/663-odd-dex-size3/info.txt
new file mode 100644
index 0000000..256c77d
--- /dev/null
+++ b/test/663-odd-dex-size3/info.txt
@@ -0,0 +1,19 @@
+Test for a dex file with an odd size followed by an aligned dex file.
+
+The code in classes.dex is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
+
+The code in classes2.dex is:
+
+class Foo {
+}
diff --git a/test/663-odd-dex-size4/663-odd-dex-size4.jar b/test/663-odd-dex-size4/663-odd-dex-size4.jar
new file mode 100644
index 0000000..d229663
--- /dev/null
+++ b/test/663-odd-dex-size4/663-odd-dex-size4.jar
Binary files differ
diff --git a/test/663-odd-dex-size4/build b/test/663-odd-dex-size4/build
new file mode 100644
index 0000000..5636558
--- /dev/null
+++ b/test/663-odd-dex-size4/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nothing to do
diff --git a/test/663-odd-dex-size4/expected.txt b/test/663-odd-dex-size4/expected.txt
new file mode 100644
index 0000000..3da1ec2
--- /dev/null
+++ b/test/663-odd-dex-size4/expected.txt
@@ -0,0 +1 @@
+HelloWorld
diff --git a/test/663-odd-dex-size4/info.txt b/test/663-odd-dex-size4/info.txt
new file mode 100644
index 0000000..2c34557
--- /dev/null
+++ b/test/663-odd-dex-size4/info.txt
@@ -0,0 +1,19 @@
+Test for an aligned dex file followed by a dex file with an odd size.
+
+The code in classes.dex is:
+
+class Foo {
+}
+
+The code in classes2.dex is:
+
+class Main {
+  public static void main(String[] args) {
+    System.out.println("HelloWorld");
+  }
+}
+
+The generated dex file was then manually edited to:
+1) Add 1 to the size value in the dex header.
+2) Add 1 byte to the file.
+3) Change the checksum in the dex header.
diff --git a/test/706-checker-scheduler/run b/test/706-checker-scheduler/run
new file mode 100644
index 0000000..5ffc303
--- /dev/null
+++ b/test/706-checker-scheduler/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/706-checker-scheduler/src-dex2oat-unresolved/UnresolvedClass.java b/test/706-checker-scheduler/src-dex2oat-unresolved/UnresolvedClass.java
new file mode 100644
index 0000000..4faa12a
--- /dev/null
+++ b/test/706-checker-scheduler/src-dex2oat-unresolved/UnresolvedClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UnresolvedClass {
+  public static int staticInt;
+  public int instanceInt;
+}
+
diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java
index a68565b..08a23a7 100644
--- a/test/706-checker-scheduler/src/Main.java
+++ b/test/706-checker-scheduler/src/Main.java
@@ -31,6 +31,7 @@
   public ExampleObj my_obj;
   public static int number1;
   public static int number2;
+  public static volatile int number3;
 
   /// CHECK-START-ARM64: int Main.arrayAccess() scheduler (before)
   /// CHECK:    <<Const1:i\d+>>       IntConstant 1
@@ -340,6 +341,87 @@
     }
   }
 
+  /// CHECK-START-ARM: void Main.accessFieldsVolatile() scheduler (before)
+  /// CHECK-START-ARM64: void Main.accessFieldsVolatile() scheduler (before)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+
+  /// CHECK-START-ARM: void Main.accessFieldsVolatile() scheduler (after)
+  /// CHECK-START-ARM64: void Main.accessFieldsVolatile() scheduler (after)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+  /// CHECK:            StaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            StaticFieldSet
+
+  public void accessFieldsVolatile() {
+    my_obj = new ExampleObj(1, 2);
+    for (int i = 0; i < 10; i++) {
+      my_obj.n1++;
+      my_obj.n2++;
+      number1++;
+      number3++;
+    }
+  }
+
+  /// CHECK-START-ARM: void Main.accessFieldsUnresolved() scheduler (before)
+  /// CHECK-START-ARM64: void Main.accessFieldsUnresolved() scheduler (before)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            UnresolvedInstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedInstanceFieldSet
+  /// CHECK:            UnresolvedStaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedStaticFieldSet
+
+  /// CHECK-START-ARM: void Main.accessFieldsUnresolved() scheduler (after)
+  /// CHECK-START-ARM64: void Main.accessFieldsUnresolved() scheduler (after)
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            InstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            InstanceFieldSet
+  /// CHECK:            UnresolvedInstanceFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedInstanceFieldSet
+  /// CHECK:            UnresolvedStaticFieldGet
+  /// CHECK:            Add
+  /// CHECK:            UnresolvedStaticFieldSet
+
+  public void accessFieldsUnresolved() {
+    my_obj = new ExampleObj(1, 2);
+    UnresolvedClass unresolved_obj = new UnresolvedClass();
+    for (int i = 0; i < 10; i++) {
+      my_obj.n1++;
+      my_obj.n2++;
+      unresolved_obj.instanceInt++;
+      UnresolvedClass.staticInt++;
+    }
+  }
+
   /// CHECK-START-ARM64: int Main.intDiv(int) scheduler (before)
   /// CHECK:               Sub
   /// CHECK:               DivZeroCheck
diff --git a/tools/dexfuzz/README b/tools/dexfuzz/README
index 1f74262..fff5473 100644
--- a/tools/dexfuzz/README
+++ b/tools/dexfuzz/README
@@ -139,6 +139,7 @@
 InstructionSwapper 80
 InvokeChanger 30
 NewArrayLengthChanger 50
+NewInstanceChanger 10
 NewMethodCaller 10
 NonsenseStringPrinter 10
 OppositeBranchChanger 40
diff --git a/tools/dexfuzz/src/dexfuzz/DexFuzz.java b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
index 2b3b8e7..feb5a13 100644
--- a/tools/dexfuzz/src/dexfuzz/DexFuzz.java
+++ b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
@@ -33,9 +33,9 @@
  * Entrypoint class for dexfuzz.
  */
 public class DexFuzz {
-  // Last version update 1.7: changed the likelihood of RegisterClobber.
+  // Last version update 1.9: fixed a bug in InvokeChanger.
   private static int majorVersion = 1;
-  private static int minorVersion = 7;
+  private static int minorVersion = 9;
   private static int seedChangeVersion = 0;
 
   /**
diff --git a/tools/dexfuzz/src/dexfuzz/program/Program.java b/tools/dexfuzz/src/dexfuzz/program/Program.java
index bb2f4c0..c6fa6c4 100644
--- a/tools/dexfuzz/src/dexfuzz/program/Program.java
+++ b/tools/dexfuzz/src/dexfuzz/program/Program.java
@@ -32,6 +32,7 @@
 import dexfuzz.program.mutators.InstructionSwapper;
 import dexfuzz.program.mutators.InvokeChanger;
 import dexfuzz.program.mutators.NewArrayLengthChanger;
+import dexfuzz.program.mutators.NewInstanceChanger;
 import dexfuzz.program.mutators.NewMethodCaller;
 import dexfuzz.program.mutators.NonsenseStringPrinter;
 import dexfuzz.program.mutators.OppositeBranchChanger;
@@ -54,6 +55,7 @@
 import dexfuzz.rawdex.ProtoIdItem;
 import dexfuzz.rawdex.RawDexFile;
 import dexfuzz.rawdex.TypeIdItem;
+import dexfuzz.rawdex.TypeList;
 import dexfuzz.rawdex.formats.ContainsPoolIndex.PoolIndexKind;
 
 import java.io.BufferedReader;
@@ -204,6 +206,7 @@
     registerMutator(new InstructionSwapper(rng, mutationStats, mutations));
     registerMutator(new InvokeChanger(rng, mutationStats, mutations));
     registerMutator(new NewArrayLengthChanger(rng, mutationStats, mutations));
+    registerMutator(new NewInstanceChanger(rng, mutationStats, mutations));
     registerMutator(new NewMethodCaller(rng, mutationStats, mutations));
     registerMutator(new NonsenseStringPrinter(rng, mutationStats, mutations));
     registerMutator(new OppositeBranchChanger(rng, mutationStats, mutations));
@@ -609,4 +612,45 @@
         fieldIdx));
     return null;
   }
-}
+
+  /**
+   * Used to convert the type index into string format.
+   * @param typeIdx
+   * @return string format of type index.
+   */
+  public String getTypeString(int typeIdx) {
+    TypeIdItem typeIdItem = rawDexFile.typeIds.get(typeIdx);
+    return rawDexFile.stringDatas.get(typeIdItem.descriptorIdx).getString();
+  }
+
+  /**
+   * Used to convert the method index into string format.
+   * @param methodIdx
+   * @return string format of method index.
+   */
+  public String getMethodString(int methodIdx) {
+    MethodIdItem methodIdItem = rawDexFile.methodIds.get(methodIdx);
+    return rawDexFile.stringDatas.get(methodIdItem.nameIdx).getString();
+  }
+
+  /**
+   * Used to convert methodID to string format of method proto.
+   * @param methodIdx
+   * @return string format of shorty.
+   */
+  public String getMethodProto(int methodIdx) {
+    MethodIdItem methodIdItem = rawDexFile.methodIds.get(methodIdx);
+    ProtoIdItem protoIdItem = rawDexFile.protoIds.get(methodIdItem.protoIdx);
+
+    if (!protoIdItem.parametersOff.pointsToSomething()) {
+      return "()" + getTypeString(protoIdItem.returnTypeIdx);
+    }
+
+    TypeList typeList = (TypeList) protoIdItem.parametersOff.getPointedToItem();
+    String typeItem = "(";
+    for (int i= 0; i < typeList.size; i++) {
+      typeItem = typeItem + typeList.list[i];
+    }
+    return typeItem + ")" + getTypeString(protoIdItem.returnTypeIdx);
+  }
+}
\ No newline at end of file
diff --git a/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java b/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
index 8750fc6..f0ed83a 100644
--- a/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
+++ b/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
@@ -167,7 +167,7 @@
   }
 
   private boolean isRangeInvokeInst(Opcode opcode){
-    return Opcode.isBetween(opcode, Opcode.INVOKE_VIRTUAL, Opcode.INVOKE_INTERFACE);
+    return Opcode.isBetween(opcode, Opcode.INVOKE_VIRTUAL_RANGE, Opcode.INVOKE_INTERFACE_RANGE);
 
   }
 
diff --git a/tools/dexfuzz/src/dexfuzz/program/mutators/NewInstanceChanger.java b/tools/dexfuzz/src/dexfuzz/program/mutators/NewInstanceChanger.java
new file mode 100644
index 0000000..cbf79e3
--- /dev/null
+++ b/tools/dexfuzz/src/dexfuzz/program/mutators/NewInstanceChanger.java
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package dexfuzz.program.mutators;
+
+import dexfuzz.Log;
+import dexfuzz.MutationStats;
+import dexfuzz.program.MInsn;
+import dexfuzz.program.MutatableCode;
+import dexfuzz.program.Mutation;
+import dexfuzz.rawdex.Opcode;
+import dexfuzz.rawdex.formats.ContainsPoolIndex;
+import dexfuzz.rawdex.formats.ContainsPoolIndex.PoolIndexKind;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Mutator NewInstanceChanger changes the new instance type in a method to
+ * any random type from the pool.
+ */
+public class NewInstanceChanger extends CodeMutator {
+
+  /**
+   * Every CodeMutator has an AssociatedMutation, representing the
+   * mutation that this CodeMutator can perform, to allow separate
+   * generateMutation() and applyMutation() phases, allowing serialization.
+   */
+  public static class AssociatedMutation extends Mutation {
+    public int newInstanceToChangeIdx;
+    public int newInstanceTypeIdx;
+
+    @Override
+    public String getString() {
+      StringBuilder builder = new StringBuilder();
+      builder.append(newInstanceToChangeIdx).append(" ");
+      builder.append(newInstanceTypeIdx);
+      return builder.toString();
+    }
+
+    @Override
+    public void parseString(String[] elements) {
+      newInstanceToChangeIdx = Integer.parseInt(elements[2]);
+      newInstanceTypeIdx = Integer.parseInt(elements[3]);
+    }
+  }
+
+  // The following two methods are here for the benefit of MutationSerializer,
+  // so it can create a CodeMutator and get the correct associated Mutation, as it
+  // reads in mutations from a dump of mutations.
+  @Override
+  public Mutation getNewMutation() {
+    return new AssociatedMutation();
+  }
+
+  public NewInstanceChanger() {}
+
+  public NewInstanceChanger(Random rng, MutationStats stats, List<Mutation> mutations) {
+    super(rng, stats, mutations);
+    likelihood = 10;
+  }
+
+  // A cache that should only exist between generateMutation() and applyMutation(),
+  // or be created at the start of applyMutation(), if we're reading in mutations from
+  // a file.
+  private List<MInsn> newInstanceCachedInsns = null;
+
+  private void generateCachedNewInstanceInsns(MutatableCode mutatableCode) {
+    if (newInstanceCachedInsns != null) {
+      return;
+    }
+
+    newInstanceCachedInsns = new ArrayList<MInsn>();
+
+    for (MInsn mInsn : mutatableCode.getInstructions()) {
+      if (mInsn.insn.info.opcode == Opcode.NEW_INSTANCE) {
+        newInstanceCachedInsns.add(mInsn);
+      }
+    }
+  }
+
+  @Override
+  protected boolean canMutate(MutatableCode mutatableCode) {
+    // Cannot change the pool index with only one type.
+    if (mutatableCode.program.getTotalPoolIndicesByKind(PoolIndexKind.Type) < 2) {
+      Log.debug("Cannot mutate, only one type, skipping...");
+      return false;
+    }
+
+    for (MInsn mInsn : mutatableCode.getInstructions()) {
+      if (mInsn.insn.info.opcode == Opcode.NEW_INSTANCE) {
+        return true;
+      }
+    }
+    Log.debug("No New Instance in method, skipping...");
+    return false;
+  }
+
+  @Override
+  protected Mutation generateMutation(MutatableCode mutatableCode) {
+    generateCachedNewInstanceInsns(mutatableCode);
+
+    int newInstanceIdxInCache = rng.nextInt(newInstanceCachedInsns.size());
+    MInsn newInstanceInsn = newInstanceCachedInsns.get(newInstanceIdxInCache);
+    int oldTypeIdx = (int) newInstanceInsn.insn.vregB;
+    int newTypeIdx = 0;
+    int totalPoolIndices = mutatableCode.program.getTotalPoolIndicesByKind(PoolIndexKind.Type);
+    if (totalPoolIndices < 2) {
+      Log.errorAndQuit("Less than two types present, quitting...");
+    }
+
+    while (newTypeIdx == oldTypeIdx) {
+      newTypeIdx = rng.nextInt(totalPoolIndices);
+    }
+
+    AssociatedMutation mutation = new AssociatedMutation();
+    mutation.setup(this.getClass(), mutatableCode);
+    mutation.newInstanceToChangeIdx = newInstanceIdxInCache;
+    mutation.newInstanceTypeIdx = newTypeIdx;
+    return mutation;
+  }
+
+  @Override
+  protected void applyMutation(Mutation uncastMutation) {
+    // Cast the Mutation to our AssociatedMutation, so we can access its fields.
+    AssociatedMutation mutation = (AssociatedMutation) uncastMutation;
+    MutatableCode mutatableCode = mutation.mutatableCode;
+
+    generateCachedNewInstanceInsns(mutatableCode);
+
+    MInsn newInstanceInsn = newInstanceCachedInsns.get(mutation.newInstanceToChangeIdx);
+
+    ContainsPoolIndex poolIndex = ((ContainsPoolIndex)newInstanceInsn.insn.info.format);
+
+    poolIndex.setPoolIndex(newInstanceInsn.insn, mutation.newInstanceTypeIdx);
+
+    Log.info("Changed the type of " + newInstanceInsn.toString() +
+        " to " + mutation.newInstanceTypeIdx);
+
+    int foundNewInstanceInsnIdx =
+        foundInsnIdx(mutatableCode, newInstanceCachedInsns.get(mutation.newInstanceToChangeIdx));
+
+    changeInvokeDirect(foundNewInstanceInsnIdx, mutation);
+
+    stats.incrementStat("Changed new instance.");
+
+    // Clear cache.
+    newInstanceCachedInsns = null;
+  }
+
+  /**
+   * Try to find the invoke-direct/ invoke-direct-range instruction that follows
+   * the new instance instruction and change the method ID of the instruction.
+   * @param foundInsnIdx
+   * @param uncastMutation
+   */
+  protected void changeInvokeDirect(int foundInsnIdx, Mutation uncastMutation) {
+    AssociatedMutation mutation = (AssociatedMutation) uncastMutation;
+    MutatableCode mutatableCode = mutation.mutatableCode;
+    if (foundInsnIdx == -1 ||
+        foundInsnIdx + 1 == mutatableCode.getInstructionCount()) {
+      return;
+    }
+
+    MInsn insn = mutatableCode.getInstructionAt(foundInsnIdx + 1);
+    if (isInvokeInst(insn)) {
+      ContainsPoolIndex poolIndex =((ContainsPoolIndex)insn.insn.info.format);
+      long oldMethodIdx = poolIndex.getPoolIndex(insn.insn);
+      String className = mutatableCode.program.getTypeString(mutation.newInstanceTypeIdx);
+      String methodName = mutatableCode.program.getMethodString((int) oldMethodIdx);
+      String shorty = mutatableCode.program.getMethodProto((int) oldMethodIdx);
+
+      // Matches the type of the invoke with the randomly changed type of the prior new-instance.
+      // This might create a lot of verification failures but still works many times.
+      // TODO: Work on generating a program which finds a valid type.
+      int methodId = mutatableCode.program.getNewItemCreator().
+          findOrCreateMethodId(className, methodName, shorty);
+
+      poolIndex.setPoolIndex(insn.insn, mutation.newInstanceTypeIdx);
+
+      insn.insn.vregB = methodId;
+
+      Log.info("Changed " + oldMethodIdx + " to " + methodId);
+    }
+  }
+
+  protected boolean isInvokeInst(MInsn mInsn) {
+    return (mInsn.insn.info.opcode == Opcode.INVOKE_DIRECT ||
+        mInsn.insn.info.opcode == Opcode.INVOKE_DIRECT_RANGE);
+  }
+
+  // Check if there is an new instance instruction, and if found, return the index.
+  // If not, return -1.
+  protected int foundInsnIdx(MutatableCode mutatableCode, MInsn newInstanceInsn) {
+    int i = 0;
+    for (MInsn mInsn : mutatableCode.getInstructions()) {
+      if (mInsn == newInstanceInsn) {
+        return i;
+      }
+      i++;
+    }
+    return -1;
+  }
+}