Merge "Relax CAS in some uses of Object::AtomicSetReadBarrierPointer."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 0afec2d..dcde5ab 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -188,6 +188,7 @@
   runtime/gc/accounting/card_table_test.cc \
   runtime/gc/accounting/mod_union_table_test.cc \
   runtime/gc/accounting/space_bitmap_test.cc \
+  runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
   runtime/gc/space/dlmalloc_space_base_test.cc \
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index f34b5ed..529143d 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -457,8 +457,10 @@
     EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJIT);
   }
   {
-    EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * KB), "-Xjitcodecachesize:16K", M::JITCodeCacheCapacity);
-    EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * MB), "-Xjitcodecachesize:16M", M::JITCodeCacheCapacity);
+    EXPECT_SINGLE_PARSE_VALUE(
+        MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity);
+    EXPECT_SINGLE_PARSE_VALUE(
+        MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity);
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold);
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index a121f8b..7b0e5af 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -128,6 +128,7 @@
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
     switch (GetInstructionSet()) {                                                        \
+      case kThumb2:                                                                       \
       case kX86:                                                                          \
       case kX86_64:                                                                       \
         /* Instruction set has read barrier support. */                                   \
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2b60a51..5da7214 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1104,7 +1104,11 @@
   // access because the verifier was unable to?
   const DexFile* dex_file = cu_->dex_file;
   CompilerDriver* driver = cu_->compiler_driver;
-  if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx, *dex_file, type_idx)) {
+  bool finalizable;
+  if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx,
+                                                     *dex_file,
+                                                     type_idx,
+                                                     &finalizable)) {
     bool is_type_initialized;
     bool use_direct_type_ptr;
     uintptr_t direct_type_ptr;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index bf3a865..e42a737 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1208,7 +1208,8 @@
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
                                                             const DexFile& dex_file,
-                                                            uint32_t type_idx) {
+                                                            uint32_t type_idx,
+                                                            bool* finalizable) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
       soa.Self(), dex_file, false);
@@ -1216,8 +1217,11 @@
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     stats_->TypeNeedsAccessCheck();
+    // Be conservative.
+    *finalizable = true;
     return false;  // Unknown class needs access checks.
   }
+  *finalizable = resolved_class->IsFinalizable();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
   mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
   if (referrer_class == nullptr) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 5683b03..dae785b 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -211,8 +211,11 @@
       REQUIRES(!Locks::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
-  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                              uint32_t type_idx)
+  // out_is_finalizable is set to whether the type is finalizable.
+  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
+                                              const DexFile& dex_file,
+                                              uint32_t type_idx,
+                                              bool* out_is_finalizable)
       REQUIRES(!Locks::mutator_lock_);
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 167c35d..3257de1 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1449,7 +1449,8 @@
                                         uint32_t* args,
                                         uint32_t register_index) {
   HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
-  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+  bool finalizable;
+  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
       ? kQuickAllocArrayWithAccessCheck
       : kQuickAllocArray;
   HInstruction* object = new (arena_) HNewArray(length,
@@ -1629,9 +1630,9 @@
   }
 }
 
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const {
+bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
   return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index);
+      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
 }
 
 void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
@@ -2508,7 +2509,9 @@
         current_block_->AddInstruction(fake_string);
         UpdateLocal(register_index, fake_string, dex_pc);
       } else {
-        QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+        bool finalizable;
+        bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+        QuickEntrypointEnum entrypoint = can_throw
             ? kQuickAllocObjectWithAccessCheck
             : kQuickAllocObject;
 
@@ -2517,6 +2520,8 @@
             dex_pc,
             type_index,
             *dex_compilation_unit_->GetDexFile(),
+            can_throw,
+            finalizable,
             entrypoint));
         UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
       }
@@ -2526,7 +2531,8 @@
     case Instruction::NEW_ARRAY: {
       uint16_t type_index = instruction.VRegC_22c();
       HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+      bool finalizable;
+      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
           ? kQuickAllocArrayWithAccessCheck
           : kQuickAllocArray;
       current_block_->AddInstruction(new (arena_) HNewArray(length,
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 9eaa4b6..f857ef0 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -138,7 +138,10 @@
   HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
   void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc);
   void InitializeParameters(uint16_t number_of_parameters);
-  bool NeedsAccessCheck(uint32_t type_index) const;
+
+  // Returns whether the current method needs access check for the type.
+  // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
 
   template<typename T>
   void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 655bbb8..12ab68e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -34,6 +34,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm {
 
 static bool ExpectedPairLayout(Location location) {
@@ -286,15 +289,6 @@
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = locations->InAt(0).AsRegister<Register>();
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -315,6 +309,8 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
@@ -322,6 +318,7 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -408,6 +405,221 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction,
+                                         Location out,
+                                         Location ref,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::arm::Thumb2Assembler::Lsl and
+          // art::arm::Thumb2Assembler::AddConstant below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddConstant(index_reg, index_reg, offset_);
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_);
+    }
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
+};
+
 #undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
@@ -581,7 +793,7 @@
       LOG(FATAL) << "Unreachable type " << type;
   }
 
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
@@ -820,7 +1032,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const {
@@ -847,7 +1059,7 @@
     }
 
     case Primitive::kPrimVoid:
-      return Location();
+      return Location::NoLocation();
   }
 
   UNREACHABLE();
@@ -1762,29 +1974,39 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  // Set the hidden argument.
-  __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
-                   invoke->GetDexMethodIndex());
+  // Set the hidden argument. This is safe to do this here, as R12
+  // won't be modified thereafter, before the `blx` (call) instruction.
+  DCHECK_EQ(R12, hidden_reg);
+  __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kArmWordSize).Int32Value();
+  uint32_t entry_point =
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -3407,6 +3629,9 @@
                                                          Register out_lo,
                                                          Register out_hi) {
   if (offset != 0) {
+    // Ensure `out_lo` is different from `addr`, so that loading
+    // `offset` into `out_lo` does not clutter `addr`.
+    DCHECK_NE(out_lo, addr);
     __ LoadImmediate(out_lo, offset);
     __ add(IP, addr, ShifterOperand(out_lo));
     addr = IP;
@@ -3594,14 +3819,26 @@
 
 void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
 
   bool volatile_for_double = field_info.IsVolatile()
       && (field_info.GetFieldType() == Primitive::kPrimDouble)
       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
-  bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong);
+  // The output overlaps in case of volatile long: we don't want the
+  // code generated by GenerateWideAtomicLoad to overwrite the
+  // object's location.  Likewise, in the case of an object field get
+  // with read barriers enabled, we do not want the load to overwrite
+  // the object's location, as we need it to emit the read barrier.
+  bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+      object_field_get_with_read_barrier;
 
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
@@ -3667,7 +3904,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  Register base = locations->InAt(0).AsRegister<Register>();
+  Location base_loc = locations->InAt(0);
+  Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
@@ -3747,7 +3985,7 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
   }
 }
 
@@ -3891,20 +4129,31 @@
 }
 
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type type = instruction->GetType();
 
@@ -3967,8 +4216,9 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
@@ -4031,8 +4281,17 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+    }
   }
 }
 
@@ -4041,11 +4300,16 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(value_type)) {
@@ -4053,20 +4317,20 @@
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
   }
-
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for read barrier too.
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register array = locations->InAt(0).AsRegister<Register>();
+  Location array_loc = locations->InAt(0);
+  Register array = array_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -4103,7 +4367,8 @@
 
     case Primitive::kPrimNot: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
+      Location value_loc = locations->InAt(2);
+      Register value = value_loc.AsRegister<Register>();
       Register source = value;
 
       if (instruction->InputAt(2)->IsNullConstant()) {
@@ -4117,6 +4382,8 @@
           __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4129,7 +4396,7 @@
       Label done;
       SlowPathCode* slow_path = nullptr;
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4149,23 +4416,63 @@
           __ Bind(&non_zero);
         }
 
-        __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp1);
-        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-        // No need to poison/unpoison, we're comparing two poisoined references.
-        __ cmp(temp1, ShifterOperand(temp2));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          Label do_put;
-          __ b(&do_put, EQ);
-          __ MaybeUnpoisonHeapReference(temp1);
-          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
-          // No need to poison/unpoison, we're comparing against null.
-          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp1);
+          //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
+          //
+          //   __ cmp(temp1, ShifterOperand(temp2));
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ b(slow_path->GetEntryLabel());
         } else {
-          __ b(slow_path->GetEntryLabel(), NE);
+          // /* HeapReference<Class> */ temp1 = array->klass_
+          __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          // If heap poisoning is enabled, no need to unpoison `temp1`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ cmp(temp1, ShifterOperand(temp2));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            Label do_put;
+            __ b(&do_put, EQ);
+            // If heap poisoning is enabled, the `temp1` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp1);
+
+            // /* HeapReference<Class> */ temp1 = temp1->super_class_
+            __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp1`, as we are comparing against null below.
+            __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ b(slow_path->GetEntryLabel(), NE);
+          }
         }
       }
 
@@ -4189,7 +4496,7 @@
         __ StoreToOffset(kStoreWord, source, IP, data_offset);
       }
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4618,7 +4925,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(R0));
+      Location::RegisterLocation(R0),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -4632,21 +4940,42 @@
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
+
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(
-        kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ AddConstant(out, current_method, declaring_class_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+    }
   } else {
     DCHECK(cls->CanCallRuntime());
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ LoadFromOffset(kLoadWord,
                       out,
                       current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    // TODO: We will need a read barrier here.
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ AddConstant(out, out, cache_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+    }
 
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -4701,13 +5030,35 @@
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(
-      kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ AddConstant(out, current_method, declaring_class_offset);
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  // TODO: We will need a read barrier here.
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ AddConstant(out, out, cache_offset);
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+  }
+
   __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4750,41 +5101,45 @@
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4798,15 +5153,9 @@
     __ CompareAndBranchIfZero(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ LoadFromOffset(kLoadWord, target, obj, class_offset);
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -4817,13 +5166,23 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -4834,14 +5193,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop, success;
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -4852,14 +5221,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       Label exact_check;
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -4870,11 +5249,12 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ cmp(out, ShifterOperand(cls));
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel(), NE);
       __ LoadImmediate(out, 1);
@@ -4883,13 +5263,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved & interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ b(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ b(&done);
       }
@@ -4915,57 +5307,61 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register temp = locations->WillCall()
-      ? Register(kNoRegister)
-      : locations->GetTemp(0).AsRegister<Register>();
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                        is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   Label done;
   // Avoid null check if we know obj is not null.
@@ -4973,76 +5369,159 @@
     __ CompareAndBranchIfZero(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ LoadFromOffset(kLoadWord, obj, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ cmp(temp, ShifterOperand(cls));
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ b(slow_path->GetEntryLabel(), NE);
+      __ b(type_check_slow_path->GetEntryLabel(), NE);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop;
+      Label loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ CompareAndBranchIfNonZero(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop;
       __ Bind(&loop);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ CompareAndBranchIfNonZero(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ b(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+      __ CompareAndBranchIfZero(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved &
+      // interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -5216,6 +5695,82 @@
   }
 }
 
+void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                Location out,
+                                                Location ref,
+                                                Location obj,
+                                                uint32_t offset,
+                                                Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                  Location out,
+                                                  Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
@@ -5299,10 +5854,11 @@
         method_reg = reg;
         __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
       }
-      // temp = current_method->dex_cache_resolved_methods_;
-      __ LoadFromOffset(
-          kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset(
-              kArmPointerSize).Int32Value());
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+      __ LoadFromOffset(kLoadWord,
+                        reg,
+                        method_reg,
+                        ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
       // temp = temp[index_in_cache]
       uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
       __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
@@ -5346,10 +5902,17 @@
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // temp = object->GetClass();
   DCHECK(receiver.IsRegister());
+  // /* HeapReference<Class> */ temp = receiver->klass_
   __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 32bfe0f..89de4f8 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -373,6 +373,51 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 70bf735..d1bddf6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -68,6 +68,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
+// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -545,6 +549,28 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
 };
 
+void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+
+  // We are about to use the assembler to place literals directly. Make sure we have enough
+  // underlying code buffer and we have generated the jump table with right size.
+  CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t),
+                             CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize);
+
+  __ Bind(&table_start_);
+  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    vixl::Label* target_label = codegen->GetLabelOf(successors[i]);
+    DCHECK(target_label->IsBound());
+    ptrdiff_t jump_offset = target_label->location() - table_start_.location();
+    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+    Literal<int32_t> literal(jump_offset);
+    __ place(&literal);
+  }
+}
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -587,6 +613,7 @@
                     compiler_options,
                     stats),
       block_labels_(nullptr),
+      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -603,10 +630,16 @@
   AddAllocatedRegister(LocationFrom(lr));
 }
 
-#undef __
 #define __ GetVIXLAssembler()->
 
+void CodeGeneratorARM64::EmitJumpTables() {
+  for (auto jump_table : jump_tables_) {
+    jump_table->EmitTable(this);
+  }
+}
+
 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+  EmitJumpTables();
   // Ensure we emit the literal pool.
   __ FinalizeCode();
 
@@ -3848,26 +3881,73 @@
 
 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   Register value_reg = InputRegisterAt(switch_instr, 0);
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Cbz(value_reg, succ);
-    } else {
-      __ Cmp(value_reg, vixl::Operand(case_value));
-      __ B(eq, succ);
-    }
-  }
+  // Roughly set 16 as max average assemblies generated per HIR in a graph.
+  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize;
+  // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
+  // make sure we don't emit it if the target may run out of range.
+  // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
+  // ranges and emit the tables only as required.
+  static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ B(codegen_->GetLabelOf(default_block));
+  if (num_entries < kPackedSwitchJumpTableThreshold ||
+      // Current instruction id is an upper bound of the number of HIRs in the graph.
+      GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
+    // Create a series of compare/jumps.
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    for (uint32_t i = 0; i < num_entries; i++) {
+      int32_t case_value = lower_bound + i;
+      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
+      if (case_value == 0) {
+        __ Cbz(value_reg, succ);
+      } else {
+        __ Cmp(value_reg, Operand(case_value));
+        __ B(eq, succ);
+      }
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ B(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr);
+    codegen_->AddJumpTable(jump_table);
+
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+
+    // Below instructions should use at most one blocked register. Since there are two blocked
+    // registers, we are free to block one.
+    Register temp_w = temps.AcquireW();
+    Register index;
+    // Remove the bias.
+    if (lower_bound != 0) {
+      index = temp_w;
+      __ Sub(index, value_reg, Operand(lower_bound));
+    } else {
+      index = value_reg;
+    }
+
+    // Jump to default block if index is out of the range.
+    __ Cmp(index, Operand(num_entries));
+    __ B(hs, codegen_->GetLabelOf(default_block));
+
+    // In current VIXL implementation, it won't require any blocked registers to encode the
+    // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
+    // register pressure.
+    Register table_base = temps.AcquireX();
+    // Load jump offset from the table.
+    __ Adr(table_base, jump_table->GetTableStartLabel());
+    Register jump_offset = temp_w;
+    __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
+
+    // Jump to target block by branching to table_base(pc related) + offset.
+    Register target_address = table_base;
+    __ Add(target_address, table_base, Operand(jump_offset, SXTW));
+    __ Br(target_address);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 4403662..881afcc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -81,6 +81,22 @@
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
 };
 
+class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> {
+ public:
+  explicit JumpTableARM64(HPackedSwitch* switch_instr)
+    : switch_instr_(switch_instr), table_start_() {}
+
+  vixl::Label* GetTableStartLabel() { return &table_start_; }
+
+  void EmitTable(CodeGeneratorARM64* codegen);
+
+ private:
+  HPackedSwitch* const switch_instr_;
+  vixl::Label table_start_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
+};
+
 static const vixl::Register kRuntimeParameterCoreRegisters[] =
     { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
@@ -358,6 +374,10 @@
     block_labels_ = CommonInitializeLabels<vixl::Label>();
   }
 
+  void AddJumpTable(JumpTableARM64* jump_table) {
+    jump_tables_.push_back(jump_table);
+  }
+
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   // Code generation helpers.
@@ -426,9 +446,12 @@
     vixl::Label* pc_insn_label;
   };
 
+  void EmitJumpTables();
+
   // Labels for each block that will be compiled.
   vixl::Label* block_labels_;  // Indexed by block id.
   vixl::Label frame_entry_label_;
+  ArenaVector<JumpTableARM64*> jump_tables_;
 
   LocationsBuilderARM64 location_builder_;
   InstructionCodeGeneratorARM64 instruction_visitor_;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2b77901..d166d00 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -397,6 +397,9 @@
                                       << invoke->IsRecursive()
                                       << std::noboolalpha;
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+    if (invoke->IsStatic()) {
+      StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
+    }
   }
 
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
@@ -500,6 +503,18 @@
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
+      } else if (instruction->IsNullConstant()) {
+        // The NullConstant may be added to the graph during other passes that happen between
+        // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
+        // doesn't run or doesn't inline anything, the NullConstant remains untyped.
+        // So we should check NullConstants for validity only after reference type propagation.
+        //
+        // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
+        // for the benefits.
+        StartAttributeStream("klass") << "not_set";
+        DCHECK(!is_after_pass_
+            || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
+            << " Expected a valid rti after reference type propagation";
       } else {
         DCHECK(!is_after_pass_)
             << "Expected a valid rti after reference type propagation";
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 0a5acc3..d2017da 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -44,7 +44,23 @@
 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathARM slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathARM for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ assembler->
@@ -662,20 +678,23 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
   ArmAssembler* assembler = codegen->GetAssembler();
-  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();             // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
 
   if (type == Primitive::kPrimLong) {
-    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
     __ add(IP, base, ShifterOperand(offset));
     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-      Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
       __ ldrexd(trg_lo, trg_hi, IP);
     } else {
       __ ldrd(trg_lo, Address(IP));
     }
   } else {
-    Register trg = locations->Out().AsRegister<Register>();
+    Register trg = trg_loc.AsRegister<Register>();
     __ ldr(trg, Address(base, offset));
   }
 
@@ -684,14 +703,18 @@
   }
 
   if (type == Primitive::kPrimNot) {
-    Register trg = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(trg);
+    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -936,6 +959,7 @@
   __ Bind(&loop_head);
 
   __ ldrex(tmp_lo, tmp_ptr);
+  // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
 
   __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
 
@@ -964,7 +988,11 @@
   // The UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
   // off temporarily as a quick fix.
+  //
   // TODO(rpl): Fix it and turn it back on.
+  //
+  // TODO(rpl): Also, we should investigate whether we need a read
+  // barrier in the generated code.
   if (kPoisonHeapReferences) {
     return;
   }
@@ -1400,6 +1428,10 @@
   }
 }
 
+// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
+// Note that this code path is not used (yet) because we do not
+// intrinsify methods that can go into the IntrinsicSlowPathARM
+// slow path.
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index ff843eb..3654159 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1391,6 +1391,108 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>();
+  GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+  Label loop;
+  Label end;
+  Label return_true;
+  Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst64(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqzc(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beqc(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bnec(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bnec(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqzc(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 4 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded to be 8-byte aligned.
+  __ Bind(&loop);
+  __ Ld(out, TMP, value_offset);
+  __ Ld(temp2, temp3, value_offset);
+  __ Bnec(out, temp2, &return_false);
+  __ Daddiu(TMP, TMP, 8);
+  __ Daddiu(temp3, temp3, 8);
+  __ Addiu(temp1, temp1, -4);
+  __ Bgtzc(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst64(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst64(out, 0);
+  __ Bind(&end);
+}
+
 static void GenerateStringIndexOf(HInvoke* invoke,
                                   Mips64Assembler* assembler,
                                   CodeGeneratorMIPS64* codegen,
@@ -1586,8 +1688,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
-UNIMPLEMENTED_INTRINSIC(StringEquals)
-
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 6fbb682..5b89cfe 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -119,19 +119,10 @@
       : ref_info_(ref_info),
         offset_(offset),
         index_(index),
-        declaring_class_def_index_(declaring_class_def_index),
-        may_become_unknown_(true) {
+        declaring_class_def_index_(declaring_class_def_index) {
     DCHECK(ref_info != nullptr);
     DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
            (offset != kInvalidFieldOffset && index == nullptr));
-
-    if (ref_info->IsSingletonAndNotReturned()) {
-      // We try to track stores to singletons that aren't returned to eliminate the stores
-      // since values in singleton's fields cannot be killed due to aliasing. Those values
-      // can still be killed due to merging values since we don't build phi for merging heap
-      // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible.
-      may_become_unknown_ = false;
-    }
   }
 
   ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
@@ -148,21 +139,11 @@
     return index_ != nullptr;
   }
 
-  // Returns true if this heap location's value may become unknown after it's
-  // set to a value, due to merge of values, or killed due to aliasing.
-  bool MayBecomeUnknown() const {
-    return may_become_unknown_;
-  }
-  void SetMayBecomeUnknown(bool val) {
-    may_become_unknown_ = val;
-  }
-
  private:
   ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
   const size_t offset_;                // offset of static/instance field.
   HInstruction* const index_;          // index of an array element.
   const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
-  bool may_become_unknown_;            // value may become kUnknownHeapValue.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocation);
 };
@@ -381,26 +362,13 @@
     return heap_locations_[heap_location_idx];
   }
 
-  void VisitFieldAccess(HInstruction* field_access,
-                        HInstruction* ref,
-                        const FieldInfo& field_info,
-                        bool is_store) {
+  void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
     if (field_info.IsVolatile()) {
       has_volatile_ = true;
     }
     const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
     const size_t offset = field_info.GetFieldOffset().SizeValue();
-    HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
-    // A store of a value may be eliminated if all future loads for that value can be eliminated.
-    // For a value that's stored into a singleton field, the value will not be killed due
-    // to aliasing. However if the value is set in a block that doesn't post dominate the definition,
-    // the value may be killed due to merging later. Before we have post dominating info, we check
-    // if the store is in the same block as the definition just to be conservative.
-    if (is_store &&
-        location->GetReferenceInfo()->IsSingletonAndNotReturned() &&
-        field_access->GetBlock() != ref->GetBlock()) {
-      location->SetMayBecomeUnknown(true);
-    }
+    GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
   }
 
   void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -409,20 +377,20 @@
   }
 
   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
   }
 
   void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
   }
 
   void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
   }
 
@@ -464,9 +432,14 @@
 };
 
 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
+// A heap location can be set to kUnknownHeapValue when:
+// - initially set a value.
+// - killed due to aliasing, merging, invocation, or loop side effects.
 static HInstruction* const kUnknownHeapValue =
     reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1));
+
 // Default heap value after an allocation.
+// A heap location can be set to that value right after an allocation.
 static HInstruction* const kDefaultHeapValue =
     reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2));
 
@@ -484,29 +457,17 @@
                                                     kUnknownHeapValue,
                                                     graph->GetArena()->Adapter(kArenaAllocLSE)),
                          graph->GetArena()->Adapter(kArenaAllocLSE)),
-        removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
-    int block_id = block->GetBlockId();
-    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    // Populate the heap_values array for this block.
     // TODO: try to reuse the heap_values array from one predecessor if possible.
     if (block->IsLoopHeader()) {
-      // We do a single pass in reverse post order. For loops, use the side effects as a hint
-      // to see if the heap values should be killed.
-      if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
-        // Leave all values as kUnknownHeapValue.
-      } else {
-        // Inherit the values from pre-header.
-        HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
-        ArenaVector<HInstruction*>& pre_header_heap_values =
-            heap_values_for_[pre_header->GetBlockId()];
-        for (size_t i = 0; i < heap_values.size(); i++) {
-          heap_values[i] = pre_header_heap_values[i];
-        }
-      }
+      HandleLoopSideEffects(block);
     } else {
       MergePredecessorValues(block);
     }
@@ -515,23 +476,34 @@
 
   // Remove recorded instructions that should be eliminated.
   void RemoveInstructions() {
-    size_t size = removed_instructions_.size();
-    DCHECK_EQ(size, substitute_instructions_.size());
+    size_t size = removed_loads_.size();
+    DCHECK_EQ(size, substitute_instructions_for_loads_.size());
     for (size_t i = 0; i < size; i++) {
-      HInstruction* instruction = removed_instructions_[i];
-      DCHECK(instruction != nullptr);
-      HInstruction* substitute = substitute_instructions_[i];
-      if (substitute != nullptr) {
-        // Keep tracing substitute till one that's not removed.
-        HInstruction* sub_sub = FindSubstitute(substitute);
-        while (sub_sub != substitute) {
-          substitute = sub_sub;
-          sub_sub = FindSubstitute(substitute);
-        }
-        instruction->ReplaceWith(substitute);
+      HInstruction* load = removed_loads_[i];
+      DCHECK(load != nullptr);
+      DCHECK(load->IsInstanceFieldGet() ||
+             load->IsStaticFieldGet() ||
+             load->IsArrayGet());
+      HInstruction* substitute = substitute_instructions_for_loads_[i];
+      DCHECK(substitute != nullptr);
+      // Keep tracing substitute till one that's not removed.
+      HInstruction* sub_sub = FindSubstitute(substitute);
+      while (sub_sub != substitute) {
+        substitute = sub_sub;
+        sub_sub = FindSubstitute(substitute);
       }
-      instruction->GetBlock()->RemoveInstruction(instruction);
+      load->ReplaceWith(substitute);
+      load->GetBlock()->RemoveInstruction(load);
     }
+
+    // At this point, stores in possibly_removed_stores_ can be safely removed.
+    size = possibly_removed_stores_.size();
+    for (size_t i = 0; i < size; i++) {
+      HInstruction* store = possibly_removed_stores_[i];
+      DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
+      store->GetBlock()->RemoveInstruction(store);
+    }
+
     // TODO: remove unnecessary allocations.
     // Eliminate instructions in singleton_new_instances_ that:
     // - don't have uses,
@@ -541,6 +513,52 @@
   }
 
  private:
+  // If heap_values[index] is an instance field store, need to keep the store.
+  // This is necessary if a heap value is killed due to merging, or loop side
+  // effects (which is essentially merging also), since a load later from the
+  // location won't be eliminated.
+  void KeepIfIsStore(HInstruction* heap_value) {
+    if (heap_value == kDefaultHeapValue ||
+        heap_value == kUnknownHeapValue ||
+        !heap_value->IsInstanceFieldSet()) {
+      return;
+    }
+    auto idx = std::find(possibly_removed_stores_.begin(),
+        possibly_removed_stores_.end(), heap_value);
+    if (idx != possibly_removed_stores_.end()) {
+      // Make sure the store is kept.
+      possibly_removed_stores_.erase(idx);
+    }
+  }
+
+  void HandleLoopSideEffects(HBasicBlock* block) {
+    DCHECK(block->IsLoopHeader());
+    int block_id = block->GetBlockId();
+    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    ArenaVector<HInstruction*>& pre_header_heap_values =
+        heap_values_for_[pre_header->GetBlockId()];
+    // We do a single pass in reverse post order. For loops, use the side effects as a hint
+    // to see if the heap values should be killed.
+    if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
+      for (size_t i = 0; i < pre_header_heap_values.size(); i++) {
+        // heap value is killed by loop side effects, need to keep the last store.
+        KeepIfIsStore(pre_header_heap_values[i]);
+      }
+      if (kIsDebugBuild) {
+        // heap_values should all be kUnknownHeapValue that it is inited with.
+        for (size_t i = 0; i < heap_values.size(); i++) {
+          DCHECK_EQ(heap_values[i], kUnknownHeapValue);
+        }
+      }
+    } else {
+      // Inherit the values from pre-header.
+      for (size_t i = 0; i < heap_values.size(); i++) {
+        heap_values[i] = pre_header_heap_values[i];
+      }
+    }
+  }
+
   void MergePredecessorValues(HBasicBlock* block) {
     const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
     if (predecessors.size() == 0) {
@@ -548,16 +566,25 @@
     }
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
-      HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i];
-      if (value != kUnknownHeapValue) {
+      HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
+      heap_values[i] = pred0_value;
+      if (pred0_value != kUnknownHeapValue) {
         for (size_t j = 1; j < predecessors.size(); j++) {
-          if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) {
-            value = kUnknownHeapValue;
+          HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
+          if (pred_value != pred0_value) {
+            heap_values[i] = kUnknownHeapValue;
             break;
           }
         }
       }
-      heap_values[i] = value;
+
+      if (heap_values[i] == kUnknownHeapValue) {
+        // Keep the last store in each predecessor since future loads cannot be eliminated.
+        for (size_t j = 0; j < predecessors.size(); j++) {
+          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+          KeepIfIsStore(pred_values[i]);
+        }
+      }
     }
   }
 
@@ -616,21 +643,30 @@
     HInstruction* heap_value = heap_values[idx];
     if (heap_value == kDefaultHeapValue) {
       HInstruction* constant = GetDefaultValue(instruction->GetType());
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(constant);
+      removed_loads_.push_back(instruction);
+      substitute_instructions_for_loads_.push_back(constant);
       heap_values[idx] = constant;
       return;
     }
+    if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
+      HInstruction* store = heap_value;
+      // This load must be from a singleton since it's from the same field
+      // that a "removed" store puts the value. That store must be to a singleton's field.
+      DCHECK(ref_info->IsSingleton());
+      // Get the real heap value of the store.
+      heap_value = store->InputAt(1);
+    }
     if ((heap_value != kUnknownHeapValue) &&
         // Keep the load due to possible I/F, J/D array aliasing.
         // See b/22538329 for details.
         (heap_value->GetType() == instruction->GetType())) {
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(heap_value);
+      removed_loads_.push_back(instruction);
+      substitute_instructions_for_loads_.push_back(heap_value);
       TryRemovingNullCheck(instruction);
       return;
     }
 
+    // Load isn't eliminated.
     if (heap_value == kUnknownHeapValue) {
       // Put the load as the value into the HeapLocation.
       // This acts like GVN but with better aliasing analysis.
@@ -662,51 +698,63 @@
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[instruction->GetBlock()->GetBlockId()];
     HInstruction* heap_value = heap_values[idx];
-    bool redundant_store = false;
+    bool same_value = false;
+    bool possibly_redundant = false;
     if (Equal(heap_value, value)) {
       // Store into the heap location with the same value.
-      redundant_store = true;
+      same_value = true;
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
     } else if (!heap_location_collector_.MayDeoptimize() &&
-               ref_info->IsSingletonAndNotReturned() &&
-               !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) {
-      // Store into a field of a singleton that's not returned. And that value cannot be
-      // killed due to merge. It's redundant since future loads will get the value
-      // set by this instruction.
-      Primitive::Type type = Primitive::kPrimVoid;
-      if (instruction->IsInstanceFieldSet()) {
-        type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType();
-      } else if (instruction->IsStaticFieldSet()) {
-        type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType();
+               ref_info->IsSingletonAndNotReturned()) {
+      // Store into a field of a singleton that's not returned. The value cannot be
+      // killed due to aliasing/invocation. It can be redundant since future loads can
+      // directly get the value set by this instruction. The value can still be killed due to
+      // merging or loop side effects. Stores whose values are killed due to merging/loop side
+      // effects later will be removed from possibly_removed_stores_ when that is detected.
+      possibly_redundant = true;
+      HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
+      DCHECK(new_instance != nullptr);
+      if (new_instance->IsFinalizable()) {
+        // Finalizable objects escape globally. Need to keep the store.
+        possibly_redundant = false;
       } else {
-        DCHECK(false) << "Must be an instance/static field set instruction.";
+        HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+        if (loop_info != nullptr) {
+          // instruction is a store in the loop so the loop must does write.
+          DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+
+          if (loop_info->IsLoopInvariant(original_ref, false)) {
+            DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
+            // Keep the store since its value may be needed at the loop header.
+            possibly_redundant = false;
+          } else {
+            // The singleton is created inside the loop. Value stored to it isn't needed at
+            // the loop header. This is true for outer loops also.
+          }
+        }
       }
-      if (value->GetType() != type) {
-        // I/F, J/D aliasing should not happen for fields.
-        DCHECK(Primitive::IsIntegralType(value->GetType()));
-        DCHECK(!Primitive::Is64BitType(value->GetType()));
-        DCHECK(Primitive::IsIntegralType(type));
-        DCHECK(!Primitive::Is64BitType(type));
-        // Keep the store since the corresponding load isn't eliminated due to different types.
-        // TODO: handle the different int types so that we can eliminate this store.
-        redundant_store = false;
-      } else {
-        redundant_store = true;
-      }
-      // TODO: eliminate the store if the singleton object is not finalizable.
-      redundant_store = false;
     }
-    if (redundant_store) {
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(nullptr);
-      TryRemovingNullCheck(instruction);
+    if (same_value || possibly_redundant) {
+      possibly_removed_stores_.push_back(instruction);
     }
 
-    heap_values[idx] = value;
+    if (!same_value) {
+      if (possibly_redundant) {
+        DCHECK(instruction->IsInstanceFieldSet());
+        // Put the store as the heap value. If the value is loaded from heap
+        // by a load later, this store isn't really redundant.
+        heap_values[idx] = instruction;
+      } else {
+        heap_values[idx] = value;
+      }
+    }
     // This store may kill values in other heap locations due to aliasing.
     for (size_t i = 0; i < heap_values.size(); i++) {
+      if (i == idx) {
+        continue;
+      }
       if (heap_values[i] == value) {
         // Same value should be kept even if aliasing happens.
         continue;
@@ -834,9 +882,10 @@
       return;
     }
     if (!heap_location_collector_.MayDeoptimize() &&
-        ref_info->IsSingletonAndNotReturned()) {
-      // The allocation might be eliminated.
-      singleton_new_instances_.push_back(new_instance);
+        ref_info->IsSingletonAndNotReturned() &&
+        !new_instance->IsFinalizable() &&
+        !new_instance->CanThrow()) {
+      // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination.
     }
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[new_instance->GetBlock()->GetBlockId()];
@@ -854,10 +903,10 @@
   // Find an instruction's substitute if it should be removed.
   // Return the same instruction if it should not be removed.
   HInstruction* FindSubstitute(HInstruction* instruction) {
-    size_t size = removed_instructions_.size();
+    size_t size = removed_loads_.size();
     for (size_t i = 0; i < size; i++) {
-      if (removed_instructions_[i] == instruction) {
-        return substitute_instructions_[i];
+      if (removed_loads_[i] == instruction) {
+        return substitute_instructions_for_loads_[i];
       }
     }
     return instruction;
@@ -871,8 +920,13 @@
 
   // We record the instructions that should be eliminated but may be
   // used by heap locations. They'll be removed in the end.
-  ArenaVector<HInstruction*> removed_instructions_;
-  ArenaVector<HInstruction*> substitute_instructions_;
+  ArenaVector<HInstruction*> removed_loads_;
+  ArenaVector<HInstruction*> substitute_instructions_for_loads_;
+
+  // Stores in this list may be removed from the list later when it's
+  // found that the store cannot be eliminated.
+  ArenaVector<HInstruction*> possibly_removed_stores_;
+
   ArenaVector<HInstruction*> singleton_new_instances_;
 
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 73a44ee..0a39ff3 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2068,6 +2068,19 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
+      return os << "explicit";
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit:
+      return os << "implicit";
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
+      return os << "none";
+    default:
+      return os << "unknown:" << static_cast<int>(rhs);
+  }
+}
+
 void HInstruction::RemoveEnvironmentUsers() {
   for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
     HUseListNode<HEnvironment*>* user_node = use_it.Current();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 1da2a1d..b68ea0f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -3505,20 +3505,19 @@
     return GetInvokeType() == kStatic;
   }
 
-  // Remove the art::HLoadClass instruction set as last input by
-  // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
-  // the initial art::HClinitCheck instruction (only relevant for
-  // static calls with explicit clinit check).
-  void RemoveLoadClassAsLastInput() {
+  // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
+  // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck)
+  // instruction; only relevant for static calls with explicit clinit check.
+  void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) {
     DCHECK(IsStaticWithExplicitClinitCheck());
     size_t last_input_index = InputCount() - 1;
     HInstruction* last_input = InputAt(last_input_index);
     DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+    DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName();
     RemoveAsUserOfInput(last_input_index);
     inputs_.pop_back();
-    clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
-    DCHECK(IsStaticWithImplicitClinitCheck());
+    clinit_check_requirement_ = new_requirement;
+    DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
   bool IsStringFactoryFor(HFakeString* str) const {
@@ -3539,7 +3538,7 @@
   }
 
   // Is this a call to a static method whose declaring class has an
-  // explicit intialization check in the graph?
+  // explicit initialization check in the graph?
   bool IsStaticWithExplicitClinitCheck() const {
     return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit);
   }
@@ -3583,6 +3582,7 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
 class HInvokeVirtual : public HInvoke {
  public:
@@ -3643,10 +3643,14 @@
                uint32_t dex_pc,
                uint16_t type_index,
                const DexFile& dex_file,
+               bool can_throw,
+               bool finalizable,
                QuickEntrypointEnum entrypoint)
       : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
+        can_throw_(can_throw),
+        finalizable_(finalizable),
         entrypoint_(entrypoint) {
     SetRawInputAt(0, current_method);
   }
@@ -3656,11 +3660,13 @@
 
   // Calls runtime so needs an environment.
   bool NeedsEnvironment() const OVERRIDE { return true; }
-  // It may throw when called on:
-  //   - interfaces
-  //   - abstract/innaccessible/unknown classes
-  // TODO: optimize when possible.
-  bool CanThrow() const OVERRIDE { return true; }
+
+  // It may throw when called on type that's not instantiable/accessible.
+  // It can throw OOME.
+  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
+  bool CanThrow() const OVERRIDE { return can_throw_ || true; }
+
+  bool IsFinalizable() const { return finalizable_; }
 
   bool CanBeNull() const OVERRIDE { return false; }
 
@@ -3671,6 +3677,8 @@
  private:
   const uint16_t type_index_;
   const DexFile& dex_file_;
+  const bool can_throw_;
+  const bool finalizable_;
   const QuickEntrypointEnum entrypoint_;
 
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 27ee472..2204921 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -383,10 +383,11 @@
       || instruction_set == kX86_64;
 }
 
-// Read barrier are supported only on x86 and x86-64 at the moment.
+// Read barrier are supported only on ARM, x86 and x86-64 at the moment.
 // TODO: Add support for other architectures and remove this function
 static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
-  return instruction_set == kX86
+  return instruction_set == kThumb2
+      || instruction_set == kX86
       || instruction_set == kX86_64;
 }
 
@@ -668,8 +669,8 @@
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
-  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
-  // overflow checks) assume thumb2.
+  // Always use the Thumb-2 assembler: some runtime functionality
+  // (like implicit stack overflow checks) assume Thumb-2.
   if (instruction_set == kArm) {
     instruction_set = kThumb2;
   }
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index ca928ae..f3d075c 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -48,12 +48,46 @@
 }
 
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
-  HLoadClass* cls = check->GetLoadClass();
-  check->ReplaceWith(cls);
-  if (check->GetPrevious() == cls) {
+  // Try to find a static invoke from which this check originated.
+  HInvokeStaticOrDirect* invoke = nullptr;
+  for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) {
+    HInstruction* user = it.Current()->GetUser();
+    if (user->IsInvokeStaticOrDirect() && CanMoveClinitCheck(check, user)) {
+      invoke = user->AsInvokeStaticOrDirect();
+      DCHECK(invoke->IsStaticWithExplicitClinitCheck());
+      invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+      break;
+    }
+  }
+  // If we found a static invoke for merging, remove the check from all other static invokes.
+  if (invoke != nullptr) {
+    for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) {
+      HInstruction* user = it.Current()->GetUser();
+      DCHECK(invoke->StrictlyDominates(user));  // All other uses must be dominated.
+      it.Advance();  // Advance before we remove the node, reference to the next node is preserved.
+      if (user->IsInvokeStaticOrDirect()) {
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+      }
+    }
+  }
+
+  HLoadClass* load_class = check->GetLoadClass();
+  bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check);
+
+  check->ReplaceWith(load_class);
+
+  if (invoke != nullptr) {
+    // Remove the check from the graph. It has been merged into the invoke.
+    check->GetBlock()->RemoveInstruction(check);
+    // Check if we can merge the load class as well.
+    if (can_merge_with_load_class && !load_class->HasUses()) {
+      load_class->GetBlock()->RemoveInstruction(load_class);
+    }
+  } else if (can_merge_with_load_class) {
     // Pass the initialization duty to the `HLoadClass` instruction,
     // and remove the instruction from the graph.
-    cls->SetMustGenerateClinitCheck(true);
+    load_class->SetMustGenerateClinitCheck(true);
     check->GetBlock()->RemoveInstruction(check);
   }
 }
@@ -86,30 +120,60 @@
     DCHECK(last_input != nullptr)
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
-    // Remove a load class instruction as last input of a static
-    // invoke, which has been added (along with a clinit check,
-    // removed by PrepareForRegisterAllocation::VisitClinitCheck
-    // previously) by the graph builder during the creation of the
-    // static invoke instruction, but is no longer required at this
-    // stage (i.e., after inlining has been performed).
-    invoke->RemoveLoadClassAsLastInput();
+    // Detach the explicit class initialization check from the invoke.
+    // Keeping track of the initializing instruction is no longer required
+    // at this stage (i.e., after inlining has been performed).
+    invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
 
-    // The static call will initialize the class so there's no need for a clinit check if
-    // it's the first user.
-    // There is one special case where we still need the clinit check, when inlining. Because
-    // currently the callee is responsible for reporting parameters to the GC, the code
-    // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC.
-    // Therefore we cannot allocate any object in that code, including loading a new class.
-    if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) {
-      last_input->SetMustGenerateClinitCheck(false);
+    // Merging with load class should have happened in VisitClinitCheck().
+    DCHECK(!CanMoveClinitCheck(last_input, invoke));
+  }
+}
 
-      // If the load class instruction is no longer used, remove it from
-      // the graph.
-      if (!last_input->HasUses()) {
-        last_input->GetBlock()->RemoveInstruction(last_input);
-      }
+bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) {
+  // Determine if input and user come from the same dex instruction, so that we can move
+  // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user)
+  // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user).
+
+  // Start with a quick dex pc check.
+  if (user->GetDexPc() != input->GetDexPc()) {
+    return false;
+  }
+
+  // Now do a thorough environment check that this is really coming from the same instruction in
+  // the same inlined graph. Unfortunately, we have to go through the whole environment chain.
+  HEnvironment* user_environment = user->GetEnvironment();
+  HEnvironment* input_environment = input->GetEnvironment();
+  while (user_environment != nullptr || input_environment != nullptr) {
+    if (user_environment == nullptr || input_environment == nullptr) {
+      // Different environment chain length. This happens when a method is called
+      // once directly and once indirectly through another inlined method.
+      return false;
+    }
+    if (user_environment->GetDexPc() != input_environment->GetDexPc() ||
+        user_environment->GetMethodIdx() != input_environment->GetMethodIdx() ||
+        !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) {
+      return false;
+    }
+    user_environment = user_environment->GetParent();
+    input_environment = input_environment->GetParent();
+  }
+
+  // Check for code motion taking the input to a different block.
+  if (user->GetBlock() != input->GetBlock()) {
+    return false;
+  }
+
+  // In debug mode, check that we have not inserted a throwing instruction
+  // or an instruction with side effects between input and user.
+  if (kIsDebugBuild) {
+    for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
+      CHECK(between != nullptr);  // User must be after input in the same block.
+      CHECK(!between->CanThrow());
+      CHECK(!between->HasSideEffects());
     }
   }
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index d7f277f..a70fb30 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -41,6 +41,8 @@
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  bool CanMoveClinitCheck(HInstruction* input, HInstruction* user);
+
   DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
 };
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 3b78264..0d05c49 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -614,23 +614,36 @@
   }
 
   bool is_exact = a.IsExact() && b.IsExact();
-  Handle<mirror::Class> type_handle;
+  ReferenceTypeInfo::TypeHandle result_type_handle;
+  ReferenceTypeInfo::TypeHandle a_type_handle = a.GetTypeHandle();
+  ReferenceTypeInfo::TypeHandle b_type_handle = b.GetTypeHandle();
+  bool a_is_interface = a_type_handle->IsInterface();
+  bool b_is_interface = b_type_handle->IsInterface();
 
   if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
-    type_handle = a.GetTypeHandle();
+    result_type_handle = a_type_handle;
   } else if (a.IsSupertypeOf(b)) {
-    type_handle = a.GetTypeHandle();
+    result_type_handle = a_type_handle;
     is_exact = false;
   } else if (b.IsSupertypeOf(a)) {
-    type_handle = b.GetTypeHandle();
+    result_type_handle = b_type_handle;
+    is_exact = false;
+  } else if (!a_is_interface && !b_is_interface) {
+    result_type_handle = handles_->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
     is_exact = false;
   } else {
-    // TODO: Find the first common super class.
-    type_handle = object_class_handle_;
+    // This can happen if:
+    //    - both types are interfaces. TODO(calin): implement
+    //    - one is an interface, the other a class, and the type does not implement the interface
+    //      e.g:
+    //        void foo(Interface i, boolean cond) {
+    //          Object o = cond ? i : new Object();
+    //        }
+    result_type_handle = object_class_handle_;
     is_exact = false;
   }
 
-  return ReferenceTypeInfo::Create(type_handle, is_exact);
+  return ReferenceTypeInfo::Create(result_type_handle, is_exact);
 }
 
 static void UpdateArrayGet(HArrayGet* instr,
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1fdffe3..0b0f094 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -60,6 +60,7 @@
   gc/collector/concurrent_copying.cc \
   gc/collector/garbage_collector.cc \
   gc/collector/immune_region.cc \
+  gc/collector/immune_spaces.cc \
   gc/collector/mark_compact.cc \
   gc/collector/mark_sweep.cc \
   gc/collector/partial_mark_sweep.cc \
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index dde1001..d5a5ea6 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -322,7 +322,8 @@
   std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr));
 }
 
-void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) {
+bool ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
+                                   std::string* error_msg) {
   VLOG(startup) << "ClassLinker::Init";
 
   Thread* const self = Thread::Current();
@@ -477,9 +478,15 @@
   // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
   // these roots.
-  CHECK_NE(0U, boot_class_path.size());
+  if (boot_class_path.empty()) {
+    *error_msg = "Boot classpath is empty.";
+    return false;
+  }
   for (auto& dex_file : boot_class_path) {
-    CHECK(dex_file.get() != nullptr);
+    if (dex_file.get() == nullptr) {
+      *error_msg = "Null dex file.";
+      return false;
+    }
     AppendToBootClassPath(self, *dex_file);
     opened_dex_files_.push_back(std::move(dex_file));
   }
@@ -660,6 +667,8 @@
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromCompiler exiting";
+
+  return true;
 }
 
 void ClassLinker::FinishInit(Thread* self) {
@@ -850,7 +859,7 @@
   DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor);
 };
 
-void ClassLinker::InitFromImage() {
+bool ClassLinker::InitFromImage(std::string* error_msg) {
   VLOG(startup) << "ClassLinker::InitFromImage entering";
   CHECK(!init_done_);
 
@@ -895,22 +904,32 @@
                                                          java_lang_Object->GetObjectSize(),
                                                          VoidFunctor()));
 
-  CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(),
-           static_cast<uint32_t>(dex_caches->GetLength()));
+  if (oat_file->GetOatHeader().GetDexFileCount() !=
+      static_cast<uint32_t>(dex_caches->GetLength())) {
+    *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from "
+                 "image";
+    return false;
+  }
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     StackHandleScope<1> hs2(self);
     Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
     const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
                                                                       nullptr);
-    CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location;
-    std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    if (oat_dex_file == nullptr) {
+      *error_msg = StringPrintf("Failed finding oat dex file for %s %s",
+                                oat_file->GetLocation().c_str(),
+                                dex_file_location.c_str());
+      return false;
+    }
+    std::string inner_error_msg;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
     if (dex_file == nullptr) {
-      LOG(FATAL) << "Failed to open dex file " << dex_file_location
-                 << " from within oat file " << oat_file->GetLocation()
-                 << " error '" << error_msg << "'";
-      UNREACHABLE();
+      *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'",
+                                dex_file_location.c_str(),
+                                oat_file->GetLocation().c_str(),
+                                inner_error_msg.c_str());
+      return false;
     }
 
     if (kSanityCheckObjects) {
@@ -920,13 +939,22 @@
                                        space);
     }
 
-    CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
+    if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
+      *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
+                                dex_file_location.c_str(),
+                                dex_file->GetLocationChecksum(),
+                                oat_dex_file->GetDexFileLocationChecksum());
+      return false;
+    }
 
     AppendToBootClassPath(*dex_file.get(), dex_cache);
     opened_dex_files_.push_back(std::move(dex_file));
   }
 
-  CHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_;
+  if (!ValidPointerSize(image_pointer_size_)) {
+    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
+    return false;
+  }
 
   // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live
   // bitmap walk.
@@ -934,7 +962,12 @@
     // Only the Aot compiler supports having an image with a different pointer size than the
     // runtime. This happens on the host for compile 32 bit tests since we use a 64 bit libart
     // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
-    CHECK_EQ(image_pointer_size_, sizeof(void*));
+    if (image_pointer_size_ != sizeof(void*)) {
+      *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu",
+                                image_pointer_size_ ,
+                                sizeof(void*));
+      return false;
+    }
   }
 
   if (kSanityCheckObjects) {
@@ -987,6 +1020,8 @@
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
+
+  return true;
 }
 
 bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a72b586..5ba9652 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -115,12 +115,15 @@
   ~ClassLinker();
 
   // Initialize class linker by bootstraping from dex files.
-  void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path)
+  bool InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
+                        std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
   // Initialize class linker from one or more images.
-  void InitFromImage() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+  bool InitFromImage(std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 13d0b84..32e77b7 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2684,26 +2684,26 @@
     case JDWP::JT_BOOLEAN:
     case JDWP::JT_BYTE:
       CHECK_EQ(width, 1U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_SHORT:
     case JDWP::JT_CHAR:
       CHECK_EQ(width, 2U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_INT:
       CHECK_EQ(width, 4U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_FLOAT:
       CHECK_EQ(width, 4U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kFloatVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kFloatVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
@@ -2721,7 +2721,7 @@
         VLOG(jdwp) << tag << " object " << o << " is an invalid object";
         return JDWP::ERR_INVALID_OBJECT;
       }
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
                                  kReferenceVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, reinterpret_cast<uintptr_t>(o));
       }
@@ -2729,14 +2729,14 @@
     }
     case JDWP::JT_DOUBLE: {
       CHECK_EQ(width, 8U);
-      if (!visitor.SetVRegPairFromDebugger(m, vreg, value, kDoubleLoVReg, kDoubleHiVReg)) {
+      if (!visitor.SetVRegPair(m, vreg, value, kDoubleLoVReg, kDoubleHiVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, value);
       }
       break;
     }
     case JDWP::JT_LONG: {
       CHECK_EQ(width, 8U);
-      if (!visitor.SetVRegPairFromDebugger(m, vreg, value, kLongLoVReg, kLongHiVReg)) {
+      if (!visitor.SetVRegPair(m, vreg, value, kLongLoVReg, kLongHiVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, value);
       }
       break;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 1b98164..bcfcb89 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -134,10 +134,10 @@
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : heap_->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
       const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" :
           "cc zygote space bitmap";
       // TODO: try avoiding using bitmaps for image/zygote to save space.
@@ -164,7 +164,7 @@
               << reinterpret_cast<void*>(region_space_->Limit());
   }
   CheckEmptyMarkStack();
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
   if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
@@ -177,7 +177,11 @@
   BindBitmaps();
   if (kVerboseMode) {
     LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_;
-    LOG(INFO) << "Immune region: " << immune_region_.Begin() << "-" << immune_region_.End();
+    LOG(INFO) << "Largest immune region: " << immune_spaces_.GetLargestImmuneRegion().Begin()
+              << "-" << immune_spaces_.GetLargestImmuneRegion().End();
+    for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+      LOG(INFO) << "Immune space: " << *space;
+    }
     LOG(INFO) << "GC end of InitializePhase";
   }
 }
@@ -300,7 +304,7 @@
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
-    DCHECK(collector_->immune_region_.ContainsObject(obj));
+    DCHECK(collector_->immune_spaces_.ContainsObject(obj));
     accounting::ContinuousSpaceBitmap* cc_bitmap =
         collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
     DCHECK(cc_bitmap != nullptr)
@@ -383,15 +387,13 @@
   }
 
   // Immune spaces.
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (immune_region_.ContainsSpace(space)) {
-      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      ConcurrentCopyingImmuneSpaceObjVisitor visitor(this);
-      live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                    reinterpret_cast<uintptr_t>(space->Limit()),
-                                    visitor);
-    }
+  for (auto& space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    ConcurrentCopyingImmuneSpaceObjVisitor visitor(this);
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  visitor);
   }
 
   Thread* self = Thread::Current();
@@ -1207,7 +1209,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
-      if (space == region_space_ || immune_region_.ContainsSpace(space)) {
+      if (space == region_space_ || immune_spaces_.ContainsSpace(space)) {
         continue;
       }
       TimingLogger::ScopedTiming split2(
@@ -1506,8 +1508,8 @@
     }
   } else {
     // In a non-moving space.
-    if (immune_region_.ContainsObject(obj)) {
-      LOG(INFO) << "holder is in the image or the zygote space.";
+    if (immune_spaces_.ContainsObject(obj)) {
+      LOG(INFO) << "holder is in an immune image or the zygote space.";
       accounting::ContinuousSpaceBitmap* cc_bitmap =
           cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
       CHECK(cc_bitmap != nullptr)
@@ -1518,7 +1520,7 @@
         LOG(INFO) << "holder is NOT marked in the bit map.";
       }
     } else {
-      LOG(INFO) << "holder is in a non-moving (or main) space.";
+      LOG(INFO) << "holder is in a non-immune, non-moving (or main) space.";
       accounting::ContinuousSpaceBitmap* mark_bitmap =
           heap_mark_bitmap_->GetContinuousSpaceBitmap(obj);
       accounting::LargeObjectBitmap* los_bitmap =
@@ -1546,7 +1548,7 @@
 void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj,
                                                                mirror::Object* ref) {
   // In a non-moving spaces. Check that the ref is marked.
-  if (immune_region_.ContainsObject(ref)) {
+  if (immune_spaces_.ContainsObject(ref)) {
     accounting::ContinuousSpaceBitmap* cc_bitmap =
         cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
     CHECK(cc_bitmap != nullptr)
@@ -1931,7 +1933,7 @@
     }
   } else {
     // from_ref is in a non-moving space.
-    if (immune_region_.ContainsObject(from_ref)) {
+    if (immune_spaces_.ContainsObject(from_ref)) {
       accounting::ContinuousSpaceBitmap* cc_bitmap =
           cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
       DCHECK(cc_bitmap != nullptr)
@@ -1985,7 +1987,7 @@
 mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
   // ref is in a non-moving space (from_ref == to_ref).
   DCHECK(!region_space_->HasAddress(ref)) << ref;
-  if (immune_region_.ContainsObject(ref)) {
+  if (immune_spaces_.ContainsObject(ref)) {
     accounting::ContinuousSpaceBitmap* cc_bitmap =
         cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
     DCHECK(cc_bitmap != nullptr)
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 27726e2..5d21c59 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -19,7 +19,7 @@
 
 #include "barrier.h"
 #include "garbage_collector.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -200,7 +200,7 @@
   bool is_marking_;                       // True while marking is ongoing.
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
-  ImmuneRegion immune_region_;
+  ImmuneSpaces immune_spaces_;
   std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_;
   std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_;
   accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc
index 3e1c944..8a04c17 100644
--- a/runtime/gc/collector/immune_region.cc
+++ b/runtime/gc/collector/immune_region.cc
@@ -32,39 +32,6 @@
   SetEnd(nullptr);
 }
 
-bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) {
-  // Bind live to mark bitmap if necessary.
-  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    CHECK(space->IsContinuousMemMapAllocSpace());
-    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
-  }
-  mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin());
-  mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit());
-  if (IsEmpty()) {
-    SetBegin(space_begin);
-    SetEnd(space_limit);
-  } else {
-    if (space_limit <= begin_) {  // Space is before the immune region.
-      SetBegin(space_begin);
-    } else if (space_begin >= end_) {  // Space is after the immune region.
-      SetEnd(space_limit);
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool ImmuneRegion::ContainsSpace(const space::ContinuousSpace* space) const {
-  bool contains =
-      begin_ <= reinterpret_cast<mirror::Object*>(space->Begin()) &&
-      end_ >= reinterpret_cast<mirror::Object*>(space->Limit());
-  if (kIsDebugBuild && contains) {
-    // A bump pointer space shoult not be in the immune region.
-    DCHECK(space->GetType() != space::kSpaceTypeBumpPointerSpace);
-  }
-  return contains;
-}
 
 }  // namespace collector
 }  // namespace gc
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 3ead501..b60426d 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -39,35 +39,34 @@
 class ImmuneRegion {
  public:
   ImmuneRegion();
+
   void Reset();
-  bool AddContinuousSpace(space::ContinuousSpace* space)
-      REQUIRES(Locks::heap_bitmap_lock_);
-  bool ContainsSpace(const space::ContinuousSpace* space) const;
+
   // Returns true if an object is inside of the immune region (assumed to be marked).
-  bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE {
+  ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const {
     // Note: Relies on integer underflow behavior.
     return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_;
   }
+
   void SetBegin(mirror::Object* begin) {
     begin_ = begin;
     UpdateSize();
   }
+
   void SetEnd(mirror::Object* end) {
     end_ = end;
     UpdateSize();
   }
 
-  mirror::Object* Begin() {
+  mirror::Object* Begin() const {
     return begin_;
   }
-  mirror::Object* End() {
+
+  mirror::Object* End() const {
     return end_;
   }
 
  private:
-  bool IsEmpty() const {
-    return size_ == 0;
-  }
   void UpdateSize() {
     size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
   }
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
new file mode 100644
index 0000000..8f9a9e2
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "immune_spaces.h"
+
+#include "gc/space/space-inl.h"
+#include "mirror/object.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+void ImmuneSpaces::Reset() {
+  spaces_.clear();
+  largest_immune_region_.Reset();
+}
+
+void ImmuneSpaces::CreateLargestImmuneRegion() {
+  uintptr_t best_begin = 0u;
+  uintptr_t best_end = 0u;
+  uintptr_t cur_begin = 0u;
+  uintptr_t cur_end = 0u;
+  // TODO: If the last space is an image space, we may include its oat file in the immune region.
+  // This could potentially hide heap corruption bugs if there is invalid pointers that point into
+  // the boot oat code
+  for (space::ContinuousSpace* space : GetSpaces()) {
+    uintptr_t space_begin = reinterpret_cast<uintptr_t>(space->Begin());
+    uintptr_t space_end = reinterpret_cast<uintptr_t>(space->Limit());
+    if (space->IsImageSpace()) {
+      // For the boot image, the boot oat file is always directly after. For app images it may not
+      // be if the app image was mapped at a random address.
+      space::ImageSpace* image_space = space->AsImageSpace();
+      // Update the end to include the other non-heap sections.
+      space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize);
+      uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin());
+      uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd());
+      if (space_end == oat_begin) {
+        DCHECK_GE(oat_end, oat_begin);
+        space_end = oat_end;
+      }
+    }
+    if (cur_begin == 0u) {
+      cur_begin = space_begin;
+      cur_end = space_end;
+    } else if (cur_end == space_begin) {
+      // Extend current region.
+      cur_end = space_end;
+    } else {
+      // Reset.
+      cur_begin = 0;
+      cur_end = 0;
+    }
+    if (cur_end - cur_begin > best_end - best_begin) {
+      // Improvement, update the best range.
+      best_begin = cur_begin;
+      best_end = cur_end;
+    }
+  }
+  largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin));
+  largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end));
+}
+
+void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) {
+  DCHECK(spaces_.find(space) == spaces_.end()) << *space;
+  // Bind live to mark bitmap if necessary.
+  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
+  }
+  spaces_.insert(space);
+  CreateLargestImmuneRegion();
+}
+
+bool ImmuneSpaces::CompareByBegin::operator()(space::ContinuousSpace* a, space::ContinuousSpace* b)
+    const {
+  return a->Begin() < b->Begin();
+}
+
+bool ImmuneSpaces::ContainsSpace(space::ContinuousSpace* space) const {
+  return spaces_.find(space) != spaces_.end();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/immune_spaces.h b/runtime/gc/collector/immune_spaces.h
new file mode 100644
index 0000000..72cb60d
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
+#define ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "gc/space/space.h"
+#include "immune_region.h"
+
+#include <set>
+
+namespace art {
+namespace gc {
+namespace space {
+class ContinuousSpace;
+}  // namespace space
+
+namespace collector {
+
+// ImmuneSpaces is a set of spaces which are not going to have any objects become marked during the
+// GC.
+class ImmuneSpaces {
+  class CompareByBegin {
+   public:
+    bool operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) const;
+  };
+
+ public:
+  ImmuneSpaces() {}
+  void Reset();
+
+  // Add a continuous space to the immune spaces set.
+  void AddSpace(space::ContinuousSpace* space) REQUIRES(Locks::heap_bitmap_lock_);
+
+  // Returns true if an object is inside of the immune region (assumed to be marked). Only returns
+  // true for the largest immune region. The object can still be inside of an immune space.
+  ALWAYS_INLINE bool IsInImmuneRegion(const mirror::Object* obj) const {
+    return largest_immune_region_.ContainsObject(obj);
+  }
+
+  // Return true if the spaces is contained.
+  bool ContainsSpace(space::ContinuousSpace* space) const;
+
+  // Return the set of spaces in the immune region.
+  const std::set<space::ContinuousSpace*, CompareByBegin>& GetSpaces() {
+    return spaces_;
+  }
+
+  // Return the associated largest immune region.
+  const ImmuneRegion& GetLargestImmuneRegion() const {
+    return largest_immune_region_;
+  }
+
+  // Return true if the object is contained by any of the immune space.s
+  ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const {
+    if (largest_immune_region_.ContainsObject(obj)) {
+      return true;
+    }
+    for (space::ContinuousSpace* space : spaces_) {
+      if (space->HasAddress(obj)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  // Setup the immune region to the largest continuous set of immune spaces. The immune region is
+  // just the for the fast path lookup.
+  void CreateLargestImmuneRegion();
+
+  std::set<space::ContinuousSpace*, CompareByBegin> spaces_;
+  ImmuneRegion largest_immune_region_;
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
new file mode 100644
index 0000000..f741117
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+#include "gc/collector/immune_spaces.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space-inl.h"
+#include "oat_file.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+namespace gc {
+namespace collector {
+
+class ImmuneSpacesTest : public CommonRuntimeTest {};
+
+class DummySpace : public space::ContinuousSpace {
+ public:
+  DummySpace(uint8_t* begin, uint8_t* end)
+      : ContinuousSpace("DummySpace",
+                        space::kGcRetentionPolicyNeverCollect,
+                        begin,
+                        end,
+                        /*limit*/end) {}
+
+  space::SpaceType GetType() const OVERRIDE {
+    return space::kSpaceTypeMallocSpace;
+  }
+
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
+    return nullptr;
+  }
+
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
+    return nullptr;
+  }
+};
+
+TEST_F(ImmuneSpacesTest, AppendBasic) {
+  ImmuneSpaces spaces;
+  uint8_t* const base = reinterpret_cast<uint8_t*>(0x1000);
+  DummySpace a(base, base + 45 * KB);
+  DummySpace b(a.Limit(), a.Limit() + 813 * KB);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(&a);
+    spaces.AddSpace(&b);
+  }
+  EXPECT_TRUE(spaces.ContainsSpace(&a));
+  EXPECT_TRUE(spaces.ContainsSpace(&b));
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), a.Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit());
+}
+
+class DummyImageSpace : public space::ImageSpace {
+ public:
+  DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap)
+      : ImageSpace("DummyImageSpace",
+                   /*image_location*/"",
+                   map,
+                   live_bitmap,
+                   map->End()) {}
+
+  // OatSize is how large the oat file is after the image.
+  static DummyImageSpace* Create(size_t size, size_t oat_size) {
+    std::string error_str;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace",
+                                                     nullptr,
+                                                     size,
+                                                     PROT_READ | PROT_WRITE,
+                                                     /*low_4gb*/true,
+                                                     /*reuse*/false,
+                                                     &error_str));
+    if (map == nullptr) {
+      LOG(ERROR) << error_str;
+      return nullptr;
+    }
+    std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap(
+        accounting::ContinuousSpaceBitmap::Create("bitmap", map->Begin(), map->Size()));
+    if (live_bitmap == nullptr) {
+      return nullptr;
+    }
+    // Create image header.
+    ImageSection sections[ImageHeader::kSectionCount];
+    new (map->Begin()) ImageHeader(
+        /*image_begin*/PointerToLowMemUInt32(map->Begin()),
+        /*image_size*/map->Size(),
+        sections,
+        /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1,
+        /*oat_checksum*/0u,
+        /*oat_file_begin*/PointerToLowMemUInt32(map->End()),
+        /*oat_data_begin*/PointerToLowMemUInt32(map->End()),
+        /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
+        /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size),
+        /*pointer_size*/sizeof(void*),
+        /*compile_pic*/false);
+    return new DummyImageSpace(map.release(), live_bitmap.release());
+  }
+};
+
+TEST_F(ImmuneSpacesTest, AppendAfterImage) {
+  ImmuneSpaces spaces;
+  constexpr size_t image_size = 123 * kPageSize;
+  constexpr size_t image_oat_size = 321 * kPageSize;
+  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size));
+  ASSERT_TRUE(image_space != nullptr);
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  EXPECT_EQ(image_header.GetImageSize(), image_size);
+  EXPECT_EQ(static_cast<size_t>(image_header.GetOatFileEnd() - image_header.GetOatFileBegin()),
+            image_oat_size);
+  DummySpace space(image_header.GetOatFileEnd(), image_header.GetOatFileEnd() + 813 * kPageSize);
+  EXPECT_NE(image_space->Limit(), space.Begin());
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(image_space.get());
+    spaces.AddSpace(&space);
+  }
+  EXPECT_TRUE(spaces.ContainsSpace(image_space.get()));
+  EXPECT_TRUE(spaces.ContainsSpace(&space));
+  // CreateLargestImmuneRegion should have coalesced the two spaces since the oat code after the
+  // image prevents gaps.
+  // Check that we have a continuous region.
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit());
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index f561764..ce6467a 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -45,7 +45,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
@@ -115,7 +115,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_;
   // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
   ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -148,7 +148,7 @@
     // Verify all the objects have the correct forward pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (!immune_region_.ContainsObject(obj)) {
+  if (!immune_spaces_.IsInImmuneRegion(obj)) {
     if (objects_before_forwarding_->HasAddress(obj)) {
       if (!objects_before_forwarding_->Set(obj)) {
         MarkStackPush(obj);  // This object was not previously marked.
@@ -218,7 +218,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
-    if (immune_region_.ContainsSpace(space)) {
+    if (immune_spaces_.ContainsSpace(space)) {
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       if (table != nullptr) {
         // TODO: Improve naming.
@@ -475,7 +475,7 @@
 }
 
 mirror::Object* MarkCompact::IsMarked(mirror::Object* object) {
-  if (immune_region_.ContainsObject(object)) {
+  if (immune_spaces_.IsInImmuneRegion(object)) {
     return object;
   }
   if (updating_references_) {
@@ -498,7 +498,7 @@
 }
 
 bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
-  return space != space_ && !immune_region_.ContainsSpace(space);
+  return space != space_ && !immune_spaces_.ContainsSpace(space);
 }
 
 class MoveObjectVisitor {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 8d91939..8a12094 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -26,7 +26,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "lock_word.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -194,8 +194,8 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune region is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked.
+  ImmuneSpaces immune_spaces_;
 
   // Bump pointer space which we are collecting.
   space::BumpPointerSpace* space_;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index db516a0..5427f88 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -86,7 +86,7 @@
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
@@ -115,7 +115,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   no_reference_class_count_.StoreRelaxed(0);
   normal_count_.StoreRelaxed(0);
   class_count_.StoreRelaxed(0);
@@ -268,16 +268,41 @@
   PreCleanCards();
 }
 
+class ScanObjectVisitor {
+ public:
+  explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
+      : mark_sweep_(mark_sweep) {}
+
+  void operator()(mirror::Object* obj) const
+      ALWAYS_INLINE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kCheckLocks) {
+      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+      Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
+    }
+    mark_sweep_->ScanObject(obj);
+  }
+
+ private:
+  MarkSweep* const mark_sweep_;
+};
+
 void MarkSweep::UpdateAndMarkModUnion() {
-  for (const auto& space : heap_->GetContinuousSpaces()) {
-    if (immune_region_.ContainsSpace(space)) {
-      const char* name = space->IsZygoteSpace()
-          ? "UpdateAndMarkZygoteModUnionTable"
-          : "UpdateAndMarkImageModUnionTable";
-      TimingLogger::ScopedTiming t(name, GetTimings());
-      accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
-      CHECK(mod_union_table != nullptr);
+  for (const auto& space : immune_spaces_.GetSpaces()) {
+    const char* name = space->IsZygoteSpace()
+        ? "UpdateAndMarkZygoteModUnionTable"
+        : "UpdateAndMarkImageModUnionTable";
+    DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
+    TimingLogger::ScopedTiming t(name, GetTimings());
+    accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
+    if (mod_union_table != nullptr) {
       mod_union_table->UpdateAndMarkReferences(this);
+    } else {
+      // No mod-union table, scan all the live bits. This can only occur for app images.
+      space->GetLiveBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                               reinterpret_cast<uintptr_t>(space->End()),
+                                               ScanObjectVisitor(this));
     }
   }
 }
@@ -460,7 +485,7 @@
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
+  if (immune_spaces_.IsInImmuneRegion(obj)) {
     if (kCountMarkedObjects) {
       ++mark_immune_count_;
     }
@@ -501,7 +526,7 @@
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
+  if (immune_spaces_.IsInImmuneRegion(obj)) {
     DCHECK(IsMarked(obj) != nullptr);
     return false;
   }
@@ -606,26 +631,6 @@
       this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving));
 }
 
-class ScanObjectVisitor {
- public:
-  explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
-      : mark_sweep_(mark_sweep) {}
-
-  void operator()(mirror::Object* obj) const
-      ALWAYS_INLINE
-      REQUIRES(Locks::heap_bitmap_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (kCheckLocks) {
-      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-      Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
-    }
-    mark_sweep_->ScanObject(obj);
-  }
-
- private:
-  MarkSweep* const mark_sweep_;
-};
-
 class DelayReferenceReferentVisitor {
  public:
   explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {}
@@ -1193,7 +1198,8 @@
   std::vector<space::ContinuousSpace*> sweep_spaces;
   space::ContinuousSpace* non_moving_space = nullptr;
   for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
-    if (space->IsAllocSpace() && !immune_region_.ContainsSpace(space) &&
+    if (space->IsAllocSpace() &&
+        !immune_spaces_.ContainsSpace(space) &&
         space->GetLiveBitmap() != nullptr) {
       if (space == heap_->GetNonMovingSpace()) {
         non_moving_space = space;
@@ -1422,7 +1428,7 @@
 }
 
 inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) {
-  if (immune_region_.ContainsObject(object)) {
+  if (immune_spaces_.IsInImmuneRegion(object)) {
     return object;
   }
   if (current_space_bitmap_->HasAddress(object)) {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 8f7df78..245f96b 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -26,7 +26,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
@@ -314,8 +314,9 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune range is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked. Immune spaces that aren't in the
+  // immune region are handled by the normal marking logic.
+  ImmuneSpaces immune_spaces_;
 
   // Parallel finger.
   AtomicInteger atomic_finger_;
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 15f782a..9847794 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -39,7 +39,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace());
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 06d20f5..12cf3db 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -74,7 +74,7 @@
       MarkStackPush(forward_address);
     }
     obj_ptr->Assign(forward_address);
-  } else if (!collect_from_space_only_ && !immune_region_.ContainsObject(obj)) {
+  } else if (!collect_from_space_only_ && !immune_spaces_.IsInImmuneRegion(obj)) {
     BitmapSetSlowPathVisitor visitor(this);
     if (!mark_bitmap_->Set(obj, visitor)) {
       // This object was not previously marked.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 7f57f30..e9497a2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -66,8 +66,9 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     } else if (space->GetLiveBitmap() != nullptr) {
+      // TODO: We can probably also add this space to the immune region.
       if (space == to_space_ || collect_from_space_only_) {
         if (collect_from_space_only_) {
           // Bind the bitmaps of the main free list space and the non-moving space we are doing a
@@ -144,7 +145,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   is_large_object_space_immune_ = false;
   saved_bytes_ = 0;
   bytes_moved_ = 0;
@@ -376,7 +377,13 @@
           << "generational_=" << generational_ << " "
           << "collect_from_space_only_=" << collect_from_space_only_;
       accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space);
-      CHECK_EQ(rem_set != nullptr, kUseRememberedSet);
+      if (kUseRememberedSet) {
+        // App images currently do not have remembered sets.
+        DCHECK((space->IsImageSpace() && space != heap_->GetBootImageSpace()) ||
+               rem_set != nullptr);
+      } else {
+        DCHECK(rem_set == nullptr);
+      }
       if (rem_set != nullptr) {
         TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
         rem_set->UpdateAndMarkReferences(from_space_, this);
@@ -767,7 +774,8 @@
   if (from_space_->HasAddress(obj)) {
     // Returns either the forwarding address or null.
     return GetForwardingAddressInFromSpace(obj);
-  } else if (collect_from_space_only_ || immune_region_.ContainsObject(obj) ||
+  } else if (collect_from_space_only_ ||
+             immune_spaces_.IsInImmuneRegion(obj) ||
              to_space_->HasAddress(obj)) {
     return obj;  // Already forwarded, must be marked.
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b9246ca..a905904 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -25,7 +25,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "mirror/object_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -201,8 +201,8 @@
   // object.
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune region is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked.
+  ImmuneSpaces immune_spaces_;
 
   // If true, the large object space is immune.
   bool is_large_object_space_immune_;
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 77f606d..e754a52 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -20,6 +20,8 @@
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "memory_tool_malloc_space-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -318,10 +320,17 @@
 
 // Implement the dlmalloc morecore callback.
 void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) {
-  Heap* heap = Runtime::Current()->GetHeap();
+  Runtime* runtime = Runtime::Current();
+  Heap* heap = runtime->GetHeap();
   ::art::gc::space::DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace();
   // Support for multiple DlMalloc provided by a slow path.
   if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) {
+    if (LIKELY(runtime->GetJit() != nullptr)) {
+      jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache();
+      if (code_cache->OwnsSpace(mspace)) {
+        return code_cache->MoreCore(mspace, increment);
+      }
+    }
     dlmalloc_space = nullptr;
     for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
       if (space->IsDlMallocSpace()) {
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 9920742..babd672 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -119,7 +119,22 @@
                                 bool* has_data,
                                 bool *is_global_cache);
 
- private:
+  // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields.
+  uint8_t* GetImageEnd() const {
+    return Begin() + GetImageHeader().GetImageSize();
+  }
+
+  // Return the start of the associated oat file.
+  uint8_t* GetOatFileBegin() const {
+    return GetImageHeader().GetOatFileBegin();
+  }
+
+  // Return the end of the associated oat file.
+  uint8_t* GetOatFileEnd() const {
+    return GetImageHeader().GetOatFileEnd();
+  }
+
+ protected:
   // Tries to initialize an ImageSpace from the given image path,
   // returning null on error.
   //
@@ -157,6 +172,7 @@
 
   const std::string image_location_;
 
+ private:
   DISALLOW_COPY_AND_ASSIGN(ImageSpace);
 };
 
diff --git a/runtime/image.h b/runtime/image.h
index 20e4159..555cf5d 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -84,7 +84,7 @@
         image_roots_(0U), pointer_size_(0U), compile_pic_(0) {}
 
   ImageHeader(uint32_t image_begin,
-              uint32_t image_size_,
+              uint32_t image_size,
               ImageSection* sections,
               uint32_t image_roots,
               uint32_t oat_checksum,
@@ -93,7 +93,7 @@
               uint32_t oat_data_end,
               uint32_t oat_file_end,
               uint32_t pointer_size,
-              bool compile_pic_);
+              bool compile_pic);
 
   bool IsValid() const;
   const char* GetMagic() const;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index f691151..ecbf13c 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -34,8 +34,10 @@
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
   jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
-  jit_options->code_cache_capacity_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
+  jit_options->code_cache_initial_capacity_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
+  jit_options->code_cache_max_capacity_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->compile_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
   jit_options->warmup_threshold_ =
@@ -69,13 +71,15 @@
   if (!jit->LoadCompiler(error_msg)) {
     return nullptr;
   }
-  jit->code_cache_.reset(JitCodeCache::Create(options->GetCodeCacheCapacity(), error_msg));
+  jit->code_cache_.reset(JitCodeCache::Create(
+      options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg));
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
-  LOG(INFO) << "JIT created with code_cache_capacity="
-      << PrettySize(options->GetCodeCacheCapacity())
-      << " compile_threshold=" << options->GetCompileThreshold();
+  LOG(INFO) << "JIT created with initial_capacity="
+      << PrettySize(options->GetCodeCacheInitialCapacity())
+      << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
+      << ", compile_threshold=" << options->GetCompileThreshold();
   return jit.release();
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 1f89f9b..fc76549 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -102,8 +102,11 @@
   size_t GetWarmupThreshold() const {
     return warmup_threshold_;
   }
-  size_t GetCodeCacheCapacity() const {
-    return code_cache_capacity_;
+  size_t GetCodeCacheInitialCapacity() const {
+    return code_cache_initial_capacity_;
+  }
+  size_t GetCodeCacheMaxCapacity() const {
+    return code_cache_max_capacity_;
   }
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
@@ -117,13 +120,18 @@
 
  private:
   bool use_jit_;
-  size_t code_cache_capacity_;
+  size_t code_cache_initial_capacity_;
+  size_t code_cache_max_capacity_;
   size_t compile_threshold_;
   size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
 
-  JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
-      dump_info_on_shutdown_(false) { }
+  JitOptions()
+      : use_jit_(false),
+        code_cache_initial_capacity_(0),
+        code_cache_max_capacity_(0),
+        compile_threshold_(0),
+        dump_info_on_shutdown_(false) { }
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index a291a09..da79109 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -44,73 +44,89 @@
     }                                                       \
   } while (false)                                           \
 
-JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) {
-  CHECK_GT(capacity, 0U);
-  CHECK_LT(capacity, kMaxCapacity);
+JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
+                                   size_t max_capacity,
+                                   std::string* error_msg) {
+  CHECK_GE(max_capacity, initial_capacity);
+  // We need to have 32 bit offsets from method headers in code cache which point to things
+  // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
+  // Ensure we're below 1 GB to be safe.
+  if (max_capacity > 1 * GB) {
+    std::ostringstream oss;
+    oss << "Maxium code cache capacity is limited to 1 GB, "
+        << PrettySize(max_capacity) << " is too big";
+    *error_msg = oss.str();
+    return nullptr;
+  }
+
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   MemMap* data_map = MemMap::MapAnonymous(
-    "data-code-cache", nullptr, capacity, kProtAll, false, false, &error_str);
+    "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str);
   if (data_map == nullptr) {
     std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
     *error_msg = oss.str();
     return nullptr;
   }
 
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+
   // Data cache is 1 / 2 of the map.
   // TODO: Make this variable?
-  size_t data_size = RoundUp(data_map->Size() / 2, kPageSize);
-  size_t code_size = data_map->Size() - data_size;
+  size_t data_size = max_capacity / 2;
+  size_t code_size = max_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, max_capacity);
   uint8_t* divider = data_map->Begin() + data_size;
 
-  // We need to have 32 bit offsets from method headers in code cache which point to things
-  // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
   if (code_map == nullptr) {
     std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
     *error_msg = oss.str();
     return nullptr;
   }
-  DCHECK_EQ(code_map->Size(), code_size);
   DCHECK_EQ(code_map->Begin(), divider);
-  return new JitCodeCache(code_map, data_map);
+  data_size = initial_capacity / 2;
+  code_size = initial_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, initial_capacity);
+  return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity);
 }
 
-JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map)
+JitCodeCache::JitCodeCache(MemMap* code_map,
+                           MemMap* data_map,
+                           size_t initial_code_capacity,
+                           size_t initial_data_capacity,
+                           size_t max_capacity)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache variable", lock_),
       collection_in_progress_(false),
       code_map_(code_map),
-      data_map_(data_map) {
+      data_map_(data_map),
+      max_capacity_(max_capacity),
+      current_capacity_(initial_code_capacity + initial_data_capacity),
+      code_end_(initial_code_capacity),
+      data_end_(initial_data_capacity),
+      has_done_one_collection_(false) {
 
-  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_map_->Size(), false /*locked*/);
-  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_map_->Size(), false /*locked*/);
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
+  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
   if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
     PLOG(FATAL) << "create_mspace_with_base failed";
   }
 
-  // Prevent morecore requests from the mspace.
-  mspace_set_footprint_limit(code_mspace_, code_map_->Size());
-  mspace_set_footprint_limit(data_mspace_, data_map_->Size());
+  SetFootprintLimit(current_capacity_);
 
   CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
 
-  live_bitmap_.reset(CodeCacheBitmap::Create("code-cache-bitmap",
-                                             reinterpret_cast<uintptr_t>(code_map_->Begin()),
-                                             reinterpret_cast<uintptr_t>(code_map_->End())));
-
-  if (live_bitmap_.get() == nullptr) {
-    PLOG(FATAL) << "creating bitmaps for the JIT code cache failed";
-  }
-
-  VLOG(jit) << "Created jit code cache: data size="
-            << PrettySize(data_map_->Size())
-            << ", code size="
-            << PrettySize(code_map_->Size());
+  VLOG(jit) << "Created jit code cache: initial data size="
+            << PrettySize(initial_data_capacity)
+            << ", initial code size="
+            << PrettySize(initial_code_capacity);
 }
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
@@ -433,13 +449,48 @@
   Barrier* const barrier_;
 };
 
-void JitCodeCache::GarbageCollectCache(Thread* self) {
-  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-    LOG(INFO) << "Clearing code cache, code="
-              << PrettySize(CodeCacheSize())
-              << ", data=" << PrettySize(DataCacheSize());
+void JitCodeCache::NotifyCollectionDone(Thread* self) {
+  collection_in_progress_ = false;
+  lock_cond_.Broadcast(self);
+}
+
+void JitCodeCache::SetFootprintLimit(size_t new_footprint) {
+  size_t per_space_footprint = new_footprint / 2;
+  DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
+  DCHECK_EQ(per_space_footprint * 2, new_footprint);
+  mspace_set_footprint_limit(data_mspace_, per_space_footprint);
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    mspace_set_footprint_limit(code_mspace_, per_space_footprint);
+  }
+}
+
+bool JitCodeCache::IncreaseCodeCacheCapacity() {
+  if (current_capacity_ == max_capacity_) {
+    return false;
   }
 
+  // Double the capacity if we're below 1MB, or increase it by 1MB if
+  // we're above.
+  if (current_capacity_ < 1 * MB) {
+    current_capacity_ *= 2;
+  } else {
+    current_capacity_ += 1 * MB;
+  }
+  if (current_capacity_ > max_capacity_) {
+    current_capacity_ = max_capacity_;
+  }
+
+  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+    LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
+  }
+
+  SetFootprintLimit(current_capacity_);
+
+  return true;
+}
+
+void JitCodeCache::GarbageCollectCache(Thread* self) {
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
 
   // Wait for an existing collection, or let everyone know we are starting one.
@@ -452,6 +503,28 @@
       collection_in_progress_ = true;
     }
   }
+
+  // Check if we just need to grow the capacity. If we don't, allocate the bitmap while
+  // we hold the lock.
+  {
+    MutexLock mu(self, lock_);
+    if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
+      has_done_one_collection_ = false;
+      NotifyCollectionDone(self);
+      return;
+    } else {
+      live_bitmap_.reset(CodeCacheBitmap::Create(
+          "code-cache-bitmap",
+          reinterpret_cast<uintptr_t>(code_map_->Begin()),
+          reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2)));
+    }
+  }
+
+  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+    LOG(INFO) << "Clearing code cache, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
+  }
   // Walk over all compiled methods and set the entry points of these
   // methods to interpreter.
   {
@@ -500,7 +573,6 @@
         }
       }
     }
-    GetLiveBitmap()->Bitmap::Clear();
 
     // Free all profiling info.
     for (ProfilingInfo* info : profiling_infos_) {
@@ -509,8 +581,9 @@
     }
     profiling_infos_.clear();
 
-    collection_in_progress_ = false;
-    lock_cond_.Broadcast(self);
+    live_bitmap_.reset(nullptr);
+    has_done_one_collection_ = true;
+    NotifyCollectionDone(self);
   }
 
   if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
@@ -589,5 +662,20 @@
   return info;
 }
 
+// NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock
+// is already held.
+void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS {
+  if (code_mspace_ == mspace) {
+    size_t result = code_end_;
+    code_end_ += increment;
+    return reinterpret_cast<void*>(result + code_map_->Begin());
+  } else {
+    DCHECK_EQ(data_mspace_, mspace);
+    size_t result = data_end_;
+    data_end_ += increment;
+    return reinterpret_cast<void*>(result + data_map_->Begin());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 131446c..13481e0 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -41,20 +41,20 @@
 
 class JitInstrumentationCache;
 
-// Alignment that will suit all architectures.
+// Alignment in bits that will suit all architectures.
 static constexpr int kJitCodeAlignment = 16;
 using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAlignment>;
 
 class JitCodeCache {
  public:
-  static constexpr size_t kMaxCapacity = 1 * GB;
+  static constexpr size_t kMaxCapacity = 64 * MB;
   // Put the default to a very low amount for debug builds to stress the code cache
   // collection.
-  static constexpr size_t kDefaultCapacity = kIsDebugBuild ? 20 * KB : 2 * MB;
+  static constexpr size_t kInitialCapacity = kIsDebugBuild ? 16 * KB : 64 * KB;
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
-  static JitCodeCache* Create(size_t capacity, std::string* error_msg);
+  static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSize() REQUIRES(!lock_);
@@ -133,9 +133,19 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
+    return mspace == code_mspace_ || mspace == data_mspace_;
+  }
+
+  void* MoreCore(const void* mspace, intptr_t increment);
+
  private:
-  // Take ownership of code_mem_map.
-  JitCodeCache(MemMap* code_map, MemMap* data_map);
+  // Take ownership of maps.
+  JitCodeCache(MemMap* code_map,
+               MemMap* data_map,
+               size_t initial_code_capacity,
+               size_t initial_data_capacity,
+               size_t max_capacity);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -172,6 +182,16 @@
   // Number of bytes allocated in the data cache.
   size_t DataCacheSizeLocked() REQUIRES(lock_);
 
+  // Notify all waiting threads that a collection is done.
+  void NotifyCollectionDone(Thread* self) REQUIRES(lock_);
+
+  // Try to increase the current capacity of the code cache. Return whether we
+  // succeeded at doing so.
+  bool IncreaseCodeCacheCapacity() REQUIRES(lock_);
+
+  // Set the footprint limit of the code cache.
+  void SetFootprintLimit(size_t new_footprint) REQUIRES(lock_);
+
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
@@ -193,6 +213,21 @@
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
 
+  // The maximum capacity in bytes this code cache can go to.
+  size_t max_capacity_ GUARDED_BY(lock_);
+
+  // The current capacity in bytes of the code cache.
+  size_t current_capacity_ GUARDED_BY(lock_);
+
+  // The current footprint in bytes of the code portion of the code cache.
+  size_t code_end_ GUARDED_BY(lock_);
+
+  // The current footprint in bytes of the data portion of the code cache.
+  size_t data_end_ GUARDED_BY(lock_);
+
+  // Whether a collection has already been done on the current capacity.
+  bool has_done_one_collection_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
index 8eef10b..9918bb7 100644
--- a/runtime/lambda/box_table.cc
+++ b/runtime/lambda/box_table.cc
@@ -62,7 +62,7 @@
 
 BoxTable::~BoxTable() {
   // Free all the copies of our closures.
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) {
+  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
     std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
 
     Closure* closure = key_value_pair.first;
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 3590586..05a9039 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -802,6 +802,18 @@
   }
 }
 
+mirror::Class* Class::GetCommonSuperClass(Handle<Class> klass) {
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(!klass->IsInterface());
+  DCHECK(!IsInterface());
+  mirror::Class* common_super_class = this;
+  while (!common_super_class->IsAssignableFrom(klass.Get())) {
+    common_super_class = common_super_class->GetSuperClass();
+  }
+  DCHECK(common_super_class != nullptr);
+  return common_super_class;
+}
+
 const char* Class::GetSourceFile() {
   const DexFile& dex_file = GetDexFile();
   const DexFile::ClassDef* dex_class_def = GetClassDef();
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 19a84f2..0ab5b97 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -657,6 +657,10 @@
 
   ALWAYS_INLINE Class* GetSuperClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Get first common super class. It will never return null.
+  // `This` and `klass` must be classes.
+  Class* GetCommonSuperClass(Handle<Class> klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetSuperClass(Class *new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
     Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index a27c9ce..ae1a4d7 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -64,8 +64,7 @@
     DEBUG_ENABLE_ASSERT             = 1 << 2,
     DEBUG_ENABLE_SAFEMODE           = 1 << 3,
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
-    DEBUG_ENABLE_JIT                = 1 << 5,
-    DEBUG_GENERATE_DEBUG_INFO       = 1 << 6,
+    DEBUG_GENERATE_DEBUG_INFO       = 1 << 5,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -97,21 +96,10 @@
   if (safe_mode) {
     // Ensure that any (secondary) oat files will be interpreted.
     runtime->AddCompilerOption("--compiler-filter=interpret-only");
+    runtime->SetSafeMode(true);
     debug_flags &= ~DEBUG_ENABLE_SAFEMODE;
   }
 
-  bool use_jit = false;
-  if ((debug_flags & DEBUG_ENABLE_JIT) != 0) {
-    if (safe_mode) {
-      LOG(INFO) << "Not enabling JIT due to safe mode";
-    } else {
-      use_jit = true;
-      LOG(INFO) << "Late-enabling JIT";
-    }
-    debug_flags &= ~DEBUG_ENABLE_JIT;
-  }
-  runtime->GetJITOptions()->SetUseJIT(use_jit);
-
   const bool generate_debug_info = (debug_flags & DEBUG_GENERATE_DEBUG_INFO) != 0;
   if (generate_debug_info) {
     runtime->AddCompilerOption("--generate-debug-info");
@@ -193,9 +181,9 @@
     if (isa != kNone && isa != kRuntimeISA) {
       action = Runtime::NativeBridgeAction::kInitialize;
     }
-    Runtime::Current()->DidForkFromZygote(env, action, isa_string.c_str());
+    Runtime::Current()->InitNonZygoteOrPostFork(env, action, isa_string.c_str());
   } else {
-    Runtime::Current()->DidForkFromZygote(env, Runtime::NativeBridgeAction::kUnload, nullptr);
+    Runtime::Current()->InitNonZygoteOrPostFork(env, Runtime::NativeBridgeAction::kUnload, nullptr);
   }
 }
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index ae16c7f..dfd783b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -152,9 +152,12 @@
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
           .IntoKey(M::UseJIT)
-      .Define("-Xjitcodecachesize:_")
+      .Define("-Xjitinitialsize:_")
           .WithType<MemoryKiB>()
-          .IntoKey(M::JITCodeCacheCapacity)
+          .IntoKey(M::JITCodeCacheInitialCapacity)
+      .Define("-Xjitmaxsize:_")
+          .WithType<MemoryKiB>()
+          .IntoKey(M::JITCodeCacheMaxCapacity)
       .Define("-Xjitthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITCompileThreshold)
@@ -640,7 +643,6 @@
   UsageMessage(stream, "  -XX:ForegroundHeapGrowthMultiplier=doublevalue\n");
   UsageMessage(stream, "  -XX:LowMemoryMode\n");
   UsageMessage(stream, "  -Xprofile:{threadcpuclock,wallclock,dualclock}\n");
-  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitthreshold:integervalue\n");
   UsageMessage(stream, "\n");
 
@@ -684,6 +686,8 @@
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Xpatchoat:filename\n");
   UsageMessage(stream, "  -Xusejit:booleanvalue\n");
+  UsageMessage(stream, "  -Xjitinitialsize:N\n");
+  UsageMessage(stream, "  -Xjitmaxsize:N\n");
   UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "  -X[no]dex2oat (Whether to invoke dex2oat on the application)\n");
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
@@ -718,6 +722,7 @@
   UsageMessage(stream, "  -Xjitblocking\n");
   UsageMessage(stream, "  -Xjitmethod:signature[,signature]* (eg Ljava/lang/String\\;replace)\n");
   UsageMessage(stream, "  -Xjitclass:classname[,classname]*\n");
+  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitoffset:offset[,offset]\n");
   UsageMessage(stream, "  -Xjitconfig:filename\n");
   UsageMessage(stream, "  -Xjitcheckcg\n");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 92a56a9..a210aa8 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -208,7 +208,8 @@
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
       oat_file_manager_(nullptr),
-      is_low_memory_mode_(false) {
+      is_low_memory_mode_(false),
+      safe_mode_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
 }
@@ -573,8 +574,9 @@
     if (is_native_bridge_loaded_) {
       PreInitializeNativeBridge(".");
     }
-    DidForkFromZygote(self->GetJniEnv(), NativeBridgeAction::kInitialize,
-                      GetInstructionSetString(kRuntimeISA));
+    InitNonZygoteOrPostFork(self->GetJniEnv(),
+                            NativeBridgeAction::kInitialize,
+                            GetInstructionSetString(kRuntimeISA));
   }
 
   ATRACE_BEGIN("StartDaemonThreads");
@@ -663,7 +665,7 @@
 #endif
 }
 
-void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa) {
+void Runtime::InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
   if (is_native_bridge_loaded_) {
@@ -685,8 +687,9 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (jit_.get() == nullptr && jit_options_->UseJIT()) {
-    // Create the JIT if the flag is set and we haven't already create it (happens for run-tests).
+  if (!safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
+    // Note that when running ART standalone (not zygote, nor zygote fork),
+    // the jit may have already been created.
     CreateJit();
   }
 
@@ -1041,8 +1044,13 @@
   class_linker_ = new ClassLinker(intern_table_);
   if (GetHeap()->HasImageSpace()) {
     ATRACE_BEGIN("InitFromImage");
-    class_linker_->InitFromImage();
+    std::string error_msg;
+    bool result = class_linker_->InitFromImage(&error_msg);
     ATRACE_END();
+    if (!result) {
+      LOG(ERROR) << "Could not initialize from image: " << error_msg;
+      return false;
+    }
     if (kIsDebugBuild) {
       GetHeap()->GetBootImageSpace()->VerifyImageAllocations();
     }
@@ -1074,7 +1082,11 @@
                  runtime_options.GetOrDefault(Opt::Image),
                  &boot_class_path);
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
-    class_linker_->InitWithoutImage(std::move(boot_class_path));
+    std::string error_msg;
+    if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
+      LOG(ERROR) << "Could not initialize without image: " << error_msg;
+      return false;
+    }
 
     // TODO: Should we move the following to InitWithoutImage?
     SetInstructionSet(instruction_set_);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index a8ba19b..d61663c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -447,7 +447,7 @@
 
   void PreZygoteFork();
   bool InitZygote();
-  void DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa);
+  void InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa);
 
   const instrumentation::Instrumentation* GetInstrumentation() const {
     return &instrumentation_;
@@ -588,6 +588,10 @@
   double GetHashTableMinLoadFactor() const;
   double GetHashTableMaxLoadFactor() const;
 
+  void SetSafeMode(bool mode) {
+    safe_mode_ = mode;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -791,6 +795,9 @@
   // Whether or not we are on a low RAM device.
   bool is_low_memory_mode_;
 
+  // Whether the application should run in safe mode, that is, interpreter only.
+  bool safe_mode_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 3489834..9051eda 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -69,7 +69,8 @@
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
-RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity,           jit::JitCodeCache::kDefaultCapacity)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 88978bf..9098d38 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -265,7 +265,12 @@
     }
   } else {
     DCHECK(cur_shadow_frame_ != nullptr);
-    *val = cur_shadow_frame_->GetVReg(vreg);
+    if (kind == kReferenceVReg) {
+      *val = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(
+          cur_shadow_frame_->GetVRegReference(vreg)));
+    } else {
+      *val = cur_shadow_frame_->GetVReg(vreg);
+    }
     return true;
   }
 }
@@ -481,52 +486,10 @@
   return true;
 }
 
-bool StackVisitor::SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value,
+bool StackVisitor::SetVReg(ArtMethod* m,
+                           uint16_t vreg,
+                           uint32_t new_value,
                            VRegKind kind) {
-  if (cur_quick_frame_ != nullptr) {
-    DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-    DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return false;
-    } else {
-      return SetVRegFromQuickCode(m, vreg, new_value, kind);
-    }
-  } else {
-    cur_shadow_frame_->SetVReg(vreg, new_value);
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                        VRegKind kind) {
-  DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-  DCHECK(m == GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-    return SetRegisterIfAccessible(reg, new_value, kind);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *addr = new_value;
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegFromDebugger(ArtMethod* m,
-                                       uint16_t vreg,
-                                       uint32_t new_value,
-                                       VRegKind kind) {
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item == nullptr) {
     return false;
@@ -551,93 +514,11 @@
   return true;
 }
 
-bool StackVisitor::SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind) {
-  const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-  if (!IsAccessibleRegister(reg, is_float)) {
-    return false;
-  }
-  const bool target64 = Is64BitInstructionSet(kRuntimeISA);
-
-  // Create a new value that can hold both low 32 and high 32 bits, in
-  // case we are running 64 bits.
-  uintptr_t full_new_value = new_value;
-  // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
-  if (target64) {
-    bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
-    bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
-    if (wide_lo || wide_hi) {
-      uintptr_t old_reg_val = GetRegister(reg, is_float);
-      uint64_t new_vreg_portion = static_cast<uint64_t>(new_value);
-      uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val);
-      uint64_t mask = 0xffffffff;
-      if (wide_lo) {
-        mask = mask << 32;
-      } else {
-        new_vreg_portion = new_vreg_portion << 32;
-      }
-      full_new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion);
-    }
-  }
-  SetRegister(reg, full_new_value, is_float);
-  return true;
-}
-
-bool StackVisitor::SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                               VRegKind kind_lo, VRegKind kind_hi) {
-  if (kind_lo == kLongLoVReg) {
-    DCHECK_EQ(kind_hi, kLongHiVReg);
-  } else if (kind_lo == kDoubleLoVReg) {
-    DCHECK_EQ(kind_hi, kDoubleHiVReg);
-  } else {
-    LOG(FATAL) << "Expected long or double: kind_lo=" << kind_lo << ", kind_hi=" << kind_hi;
-  }
-  if (cur_quick_frame_ != nullptr) {
-    DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-    DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return false;
-    } else {
-      return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
-    }
-  } else {
-    DCHECK(cur_shadow_frame_ != nullptr);
-    cur_shadow_frame_->SetVRegLong(vreg, new_value);
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegPairFromQuickCode(
-    ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset_lo, vmap_offset_hi;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
-    bool is_float = (kind_lo == kDoubleLoVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
-    uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
-    return SetRegisterPairIfAccessible(reg_lo, reg_hi, new_value, is_float);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *reinterpret_cast<uint64_t*>(addr) = new_value;
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegPairFromDebugger(ArtMethod* m,
-                                           uint16_t vreg,
-                                           uint64_t new_value,
-                                           VRegKind kind_lo,
-                                           VRegKind kind_hi) {
+bool StackVisitor::SetVRegPair(ArtMethod* m,
+                               uint16_t vreg,
+                               uint64_t new_value,
+                               VRegKind kind_lo,
+                               VRegKind kind_hi) {
   if (kind_lo == kLongLoVReg) {
     DCHECK_EQ(kind_hi, kLongHiVReg);
   } else if (kind_lo == kDoubleLoVReg) {
@@ -666,25 +547,6 @@
   return true;
 }
 
-bool StackVisitor::SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi,
-                                               uint64_t new_value, bool is_float) {
-  if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
-    return false;
-  }
-  uintptr_t new_value_lo = static_cast<uintptr_t>(new_value & 0xFFFFFFFF);
-  uintptr_t new_value_hi = static_cast<uintptr_t>(new_value >> 32);
-  bool target64 = Is64BitInstructionSet(kRuntimeISA);
-  // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
-  if (target64) {
-    DCHECK_EQ(reg_lo, reg_hi);
-    SetRegister(reg_lo, new_value, is_float);
-  } else {
-    SetRegister(reg_lo, new_value_lo, is_float);
-    SetRegister(reg_hi, new_value_hi, is_float);
-  }
-  return true;
-}
-
 bool StackVisitor::IsAccessibleGPR(uint32_t reg) const {
   DCHECK(context_ != nullptr);
   return context_->IsAccessibleGPR(reg);
@@ -702,12 +564,6 @@
   return context_->GetGPR(reg);
 }
 
-void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
-  DCHECK(context_ != nullptr);
-  context_->SetGPR(reg, value);
-}
-
 bool StackVisitor::IsAccessibleFPR(uint32_t reg) const {
   DCHECK(context_ != nullptr);
   return context_->IsAccessibleFPR(reg);
@@ -719,12 +575,6 @@
   return context_->GetFPR(reg);
 }
 
-void StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
-  DCHECK(context_ != nullptr);
-  context_->SetFPR(reg, value);
-}
-
 uintptr_t StackVisitor::GetReturnPc() const {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   DCHECK(sp != nullptr);
diff --git a/runtime/stack.h b/runtime/stack.h
index aa7b616..a0c44cb 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -600,22 +600,18 @@
                    uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
+  // is triggered to make the values effective.
   bool SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
   // is triggered to make the values effective.
-  bool SetVRegFromDebugger(ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                   VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
-  // is triggered to make the values effective.
-  bool SetVRegPairFromDebugger(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                               VRegKind kind_lo, VRegKind kind_hi)
+  bool SetVRegPair(ArtMethod* m,
+                   uint16_t vreg,
+                   uint64_t new_value,
+                   VRegKind kind_lo,
+                   VRegKind kind_hi)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   uintptr_t* GetGPRAddress(uint32_t reg) const;
@@ -749,22 +745,12 @@
     DCHECK(IsAccessibleRegister(reg, is_float));
     return is_float ? GetFPR(reg) : GetGPR(reg);
   }
-  void SetRegister(uint32_t reg, uintptr_t value, bool is_float) {
-    DCHECK(IsAccessibleRegister(reg, is_float));
-    if (is_float) {
-      SetFPR(reg, value);
-    } else {
-      SetGPR(reg, value);
-    }
-  }
 
   bool IsAccessibleGPR(uint32_t reg) const;
   uintptr_t GetGPR(uint32_t reg) const;
-  void SetGPR(uint32_t reg, uintptr_t value);
 
   bool IsAccessibleFPR(uint32_t reg) const;
   uintptr_t GetFPR(uint32_t reg) const;
-  void SetFPR(uint32_t reg, uintptr_t value);
 
   bool GetVRegFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -789,19 +775,6 @@
                                    uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool SetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                            VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool SetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                                VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi, uint64_t new_value,
-                                   bool is_float)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   void SanityCheckFrame() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   InlineInfo GetCurrentInlineInfo() const SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index cd5e5a7..ec63057 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -14,17 +14,22 @@
  * limitations under the License.
  */
 
-interface Interface {
+interface SuperInterface {
+  void superInterfaceMethod();
+}
+
+interface OtherInterface extends SuperInterface {
+}
+
+interface Interface extends SuperInterface {
   void $noinline$f();
 }
 
 class Super implements Interface {
+  public void superInterfaceMethod() {}
   public void $noinline$f() {
     throw new RuntimeException();
   }
-
-  public int instanceField;
-
 }
 
 class SubclassA extends Super {
@@ -551,40 +556,30 @@
   private void argumentCheck(Super s, double d, SubclassA a, Final f) {
   }
 
-  /// CHECK-START: Main Main.getMain(boolean) reference_type_propagation (after)
-  /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
-  /// CHECK:                         Return [<<Phi>>]
-  private Main getMain(boolean cond) {
-    return cond ? null : new Main();
-  }
-
-  /// CHECK-START: Super Main.getSuper(boolean, SubclassA, SubclassB) reference_type_propagation (after)
-  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
-  /// CHECK:                         Return [<<Phi>>]
-  private Super getSuper(boolean cond, SubclassA a, SubclassB b) {
-    return cond ? a : b;
-  }
-
   private Main getNull() {
     return null;
   }
 
   private int mainField = 0;
 
-  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, SubclassA, SubclassB) inliner (before)
-  /// CHECK:      <<Int:i\d+>>       IntConstant 0
-  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Super
-  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Invoke>>] klass:Super exact:false
-  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
-
-  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, SubclassA, SubclassB) inliner (after)
-  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) reference_type_propagation (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
-  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:Super exact:false
-  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
-  private void testInlinerWidensReturnType(boolean cond, SubclassA a, SubclassB b) {
-    Super o = getSuper(cond, a, b);
-    o.instanceField = 0;
+  /// CHECK:                         Return [<<Phi>>]
+  private SuperInterface getWiderType(boolean cond, Interface a, OtherInterface b) {
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, Interface, OtherInterface) inliner (before)
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SuperInterface
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Invoke>>] klass:SuperInterface exact:false
+  /// CHECK:                         InvokeInterface [<<NullCheck>>]
+
+  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, Interface, OtherInterface) inliner (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:SuperInterface exact:false
+  /// CHECK:                         InvokeInterface [<<NullCheck>>]
+  private void testInlinerWidensReturnType(boolean cond, Interface a, OtherInterface b) {
+    getWiderType(cond, a, b).superInterfaceMethod();
   }
 
   /// CHECK-START: void Main.testInlinerReturnsNull() inliner (before)
diff --git a/test/455-set-vreg/expected.txt b/test/455-set-vreg/expected.txt
deleted file mode 100644
index 6a5618e..0000000
--- a/test/455-set-vreg/expected.txt
+++ /dev/null
@@ -1 +0,0 @@
-JNI_OnLoad called
diff --git a/test/455-set-vreg/info.txt b/test/455-set-vreg/info.txt
deleted file mode 100644
index e8c57b5..0000000
--- a/test/455-set-vreg/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Tests for setting DEX registers in a Java method.
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
deleted file mode 100644
index 21149f6..0000000
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arch/context.h"
-#include "art_method-inl.h"
-#include "jni.h"
-#include "scoped_thread_state_change.h"
-#include "stack.h"
-#include "thread.h"
-
-namespace art {
-
-namespace {
-
-class TestVisitor : public StackVisitor {
- public:
-  TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        this_value_(this_value) {}
-
-  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    std::string m_name(m->GetName());
-
-    if (m_name.compare("testIntVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVReg(m, 2, 5, kIntVReg));
-      CHECK(SetVReg(m, 3, 4, kIntVReg));
-      CHECK(SetVReg(m, 4, 3, kIntVReg));
-      CHECK(SetVReg(m, 5, 2, kIntVReg));
-      CHECK(SetVReg(m, 6, 1, kIntVReg));
-    } else if (m_name.compare("testLongVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVRegPair(m, 4, std::numeric_limits<int64_t>::max(), kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 6, 4, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 8, 3, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 10, 2, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 12, 1, kLongLoVReg, kLongHiVReg));
-    } else if (m_name.compare("testFloatVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
-    } else if (m_name.compare("testDoubleVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
-    }
-
-    return true;
-  }
-
-  mirror::Object* this_value_;
-};
-
-extern "C" JNIEXPORT void JNICALL Java_Main_doNativeCallSetVReg(JNIEnv*, jobject value) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<Context> context(Context::Create());
-  TestVisitor visitor(soa.Self(), context.get(), soa.Decode<mirror::Object*>(value));
-  visitor.WalkStack();
-}
-
-}  // namespace
-
-}  // namespace art
diff --git a/test/455-set-vreg/src/Main.java b/test/455-set-vreg/src/Main.java
deleted file mode 100644
index 4db9d66..0000000
--- a/test/455-set-vreg/src/Main.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-  public Main() {
-  }
-
-  int testIntVReg(int a, int b, int c, int d, int e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  long testLongVReg(long a, long b, long c, long d, long e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  float testFloatVReg(float a, float b, float c, float d, float e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  double testDoubleVReg(double a, double b, double c, double d, double e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  native void doNativeCallSetVReg();
-
-  public static void main(String[] args) {
-    System.loadLibrary(args[0]);
-    Main rm = new Main();
-    int intExpected = 5 - 4 - 3 - 2 - 1;
-    int intResult = rm.testIntVReg(0, 0, 0, 0, 0);
-    if (intResult != intExpected) {
-      throw new Error("Expected " + intExpected + ", got " + intResult);
-    }
-
-    long longExpected = Long.MAX_VALUE - 4 - 3 - 2 - 1;
-    long longResult = rm.testLongVReg(0, 0, 0, 0, 0);
-    if (longResult != longExpected) {
-      throw new Error("Expected " + longExpected + ", got " + longResult);
-    }
-
-    float floatExpected = 5.0f - 4.0f - 3.0f - 2.0f - 1.0f;
-    float floatResult = rm.testFloatVReg(0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
-    if (floatResult != floatExpected) {
-      throw new Error("Expected " + floatExpected + ", got " + floatResult);
-    }
-
-    double doubleExpected = 5.0 - 4.0 - 3.0 - 2.0 - 1.0;
-    double doubleResult = rm.testDoubleVReg(0.0, 0.0, 0.0, 0.0, 0.0);
-    if (doubleResult != doubleExpected) {
-      throw new Error("Expected " + doubleExpected + ", got " + doubleResult);
-    }
-  }
-}
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index 64b2336..79fa8b0 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -68,7 +68,7 @@
         CHECK(!success);
       }
 
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
+      CHECK(GetVReg(m, 3, kIntVReg, &value));
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt
index 387e1a7..7de097f 100644
--- a/test/478-checker-clinit-check-pruning/expected.txt
+++ b/test/478-checker-clinit-check-pruning/expected.txt
@@ -4,3 +4,9 @@
 Main$ClassWithClinit4's static initializer
 Main$ClassWithClinit5's static initializer
 Main$ClassWithClinit6's static initializer
+Main$ClassWithClinit7's static initializer
+Main$ClassWithClinit8's static initializer
+Main$ClassWithClinit9's static initializer
+Main$ClassWithClinit10's static initializer
+Main$ClassWithClinit11's static initializer
+Main$ClassWithClinit12's static initializer
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
index cff6273..7993513 100644
--- a/test/478-checker-clinit-check-pruning/src/Main.java
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -83,7 +83,7 @@
   // before the next pass (liveness analysis) instead.
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
-  /// CHECK:                               InvokeStaticOrDirect
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:implicit
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
   /// CHECK-NOT:                           LoadClass
@@ -269,7 +269,7 @@
   /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before)
   /// CHECK-DAG:     <<IntConstant:i\d+>>  IntConstant 0
   /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK-DAG:                           InvokeStaticOrDirect clinit_check:implicit
   /// CHECK-DAG:                           StaticFieldSet [<<LoadClass>>,<<IntConstant>>]
 
   /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before)
@@ -289,7 +289,7 @@
   /// CHECK-DAG:     <<IntConstant:i\d+>>  IntConstant 0
   /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:true
   /// CHECK-DAG:                           StaticFieldSet [<<LoadClass>>,<<IntConstant>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK-DAG:                           InvokeStaticOrDirect clinit_check:none
 
   /// CHECK-START: void Main.clinitBecauseOfFieldAccess() liveness (before)
   /// CHECK-NOT:                           ClinitCheck
@@ -298,6 +298,206 @@
     ClassWithClinit2.$noinline$staticMethod();
   }
 
+  /*
+   * Verify that LoadClass from const-class is not merged with
+   * later invoke-static (or it's ClinitCheck).
+   */
+
+  /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:false
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:implicit
+
+  /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void constClassAndInvokeStatic(Iterable it) {
+    $opt$inline$ignoreClass(ClassWithClinit7.class);
+    ClassWithClinit7.someStaticMethod(it);
+  }
+
+  static void $opt$inline$ignoreClass(Class c) {
+  }
+
+  static class ClassWithClinit7 {
+    static {
+      System.out.println("Main$ClassWithClinit7's static initializer");
+    }
+
+    // Note: not inlined from constClassAndInvokeStatic() but fully inlined from main().
+    static void someStaticMethod(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * Verify that LoadClass from sget is not merged with later invoke-static.
+   */
+
+  /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void sgetAndInvokeStatic(Iterable it) {
+    $opt$inline$ignoreInt(ClassWithClinit8.value);
+    ClassWithClinit8.someStaticMethod(it);
+  }
+
+  static void $opt$inline$ignoreInt(int i) {
+  }
+
+  static class ClassWithClinit8 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit8's static initializer");
+    }
+
+    // Note: not inlined from sgetAndInvokeStatic() but fully inlined from main().
+    static void someStaticMethod(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * Verify that LoadClass from const-class, ClinitCheck from sget and
+   * InvokeStaticOrDirect from invoke-static are not merged.
+   */
+
+  /// CHECK-START: void Main.constClassSgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:false
+  /// CHECK:                               ClinitCheck
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  static void constClassSgetAndInvokeStatic(Iterable it) {
+    $opt$inline$ignoreClass(ClassWithClinit9.class);
+    $opt$inline$ignoreInt(ClassWithClinit9.value);
+    ClassWithClinit9.someStaticMethod(it);
+  }
+
+  static class ClassWithClinit9 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit9's static initializer");
+    }
+
+    // Note: not inlined from constClassSgetAndInvokeStatic() but fully inlined from main().
+    static void someStaticMethod(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * Verify that LoadClass from a fully-inlined invoke-static is not merged
+   * with InvokeStaticOrDirect from a later invoke-static to the same method.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaNonStatic(Iterable it) {
+    inlinedInvokeStaticViaNonStaticHelper(null);
+    inlinedInvokeStaticViaNonStaticHelper(it);
+  }
+
+  static void inlinedInvokeStaticViaNonStaticHelper(Iterable it) {
+    ClassWithClinit10.inlinedForNull(it);
+  }
+
+  static class ClassWithClinit10 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit10's static initializer");
+    }
+
+    static void inlinedForNull(Iterable it) {
+      if (it != null) {
+        // We're not inlining invoke-interface at the moment.
+        it.iterator();
+      }
+    }
+  }
+
+  /*
+   * Check that the LoadClass from an invoke-static C.foo() doesn't get merged with
+   * an invoke-static inside C.foo(). This would mess up the stack walk in the
+   * resolution trampoline where we would have to load C (if C isn't loaded yet)
+   * which is not permitted there.
+   *
+   * Note: In case of failure, we would get an failed assertion during compilation,
+   * so we wouldn't really get to the checker tests below.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaStatic(Iterable it) {
+    ClassWithClinit11.callInlinedForNull(it);
+  }
+
+  static class ClassWithClinit11 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit11's static initializer");
+    }
+
+    static void callInlinedForNull(Iterable it) {
+      inlinedForNull(it);
+    }
+
+    static void inlinedForNull(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * A test similar to inlinedInvokeStaticViaStatic() but doing the indirect invoke
+   * twice with the first one to be fully inlined.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaStaticTwice(Iterable it) {
+    ClassWithClinit12.callInlinedForNull(null);
+    ClassWithClinit12.callInlinedForNull(it);
+  }
+
+  static class ClassWithClinit12 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit12's static initializer");
+    }
+
+    static void callInlinedForNull(Iterable it) {
+      inlinedForNull(it);
+    }
+
+    static void inlinedForNull(Iterable it) {
+      if (it != null) {
+        // We're not inlining invoke-interface at the moment.
+        it.iterator();
+      }
+    }
+  }
+
   // TODO: Add a test for the case of a static method whose declaring
   // class type index is not available (i.e. when `storage_index`
   // equals `DexFile::kDexNoIndex` in
@@ -310,5 +510,12 @@
     ClassWithClinit4.invokeStaticNotInlined();
     SubClassOfClassWithClinit5.invokeStaticInlined();
     SubClassOfClassWithClinit6.invokeStaticNotInlined();
+    Iterable it = new Iterable() { public java.util.Iterator iterator() { return null; } };
+    constClassAndInvokeStatic(it);
+    sgetAndInvokeStatic(it);
+    constClassSgetAndInvokeStatic(it);
+    inlinedInvokeStaticViaNonStatic(it);
+    inlinedInvokeStaticViaStatic(it);
+    inlinedInvokeStaticViaStaticTwice(it);
   }
 }
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index c766aaa..13c4722 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -22,7 +22,7 @@
     return radius * radius * Math.PI;
   }
   private double radius;
-};
+}
 
 class TestClass {
   TestClass() {
@@ -35,17 +35,31 @@
   int j;
   volatile int k;
   TestClass next;
+  String str;
   static int si;
-};
+}
 
 class SubTestClass extends TestClass {
   int k;
-};
+}
 
 class TestClass2 {
   int i;
   int j;
-};
+}
+
+class Finalizable {
+  static boolean sVisited = false;
+  static final int VALUE = 0xbeef;
+  int i;
+
+  protected void finalize() {
+    if (i != VALUE) {
+      System.out.println("Where is the beef?");
+    }
+    sVisited = true;
+  }
+}
 
 public class Main {
 
@@ -56,7 +70,7 @@
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
   static double calcCircleArea(double radius) {
@@ -117,7 +131,7 @@
   /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldSet
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
   // A new allocation shouldn't alias with pre-existing values.
@@ -223,7 +237,7 @@
 
   /// CHECK-START: int Main.test8() load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK: InvokeVirtual
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: InstanceFieldGet
@@ -381,8 +395,8 @@
 
   /// CHECK-START: int Main.test16() load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK-NOT: StaticFieldSet
-  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
 
   // Test inlined constructor.
   static int test16() {
@@ -398,8 +412,8 @@
   /// CHECK-START: int Main.test17() load_store_elimination (after)
   /// CHECK: <<Const0:i\d+>> IntConstant 0
   /// CHECK: NewInstance
-  /// CHECK-NOT: StaticFieldSet
-  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
   /// CHECK: Return [<<Const0>>]
 
   // Test getting default value.
@@ -455,6 +469,148 @@
     return obj;
   }
 
+  /// CHECK-START: void Main.test21() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: StaticFieldSet
+  /// CHECK: StaticFieldGet
+
+  /// CHECK-START: void Main.test21() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: StaticFieldSet
+  /// CHECK: InstanceFieldGet
+
+  // Loop side effects can kill heap values, stores need to be kept in that case.
+  static void test21() {
+    TestClass obj = new TestClass();
+    obj.str = "abc";
+    for (int i = 0; i < 2; i++) {
+      // Generate some loop side effect that does write.
+      obj.si = 1;
+    }
+    System.out.print(obj.str.substring(0, 0));
+  }
+
+  /// CHECK-START: int Main.test22() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test22() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Loop side effects only affects stores into singletons that dominiates the loop header.
+  static int test22() {
+    int sum = 0;
+    TestClass obj1 = new TestClass();
+    obj1.i = 2;       // This store can't be eliminated since it can be killed by loop side effects.
+    for (int i = 0; i < 2; i++) {
+      TestClass obj2 = new TestClass();
+      obj2.i = 3;    // This store can be eliminated since the singleton is inside the loop.
+      sum += obj2.i;
+    }
+    TestClass obj3 = new TestClass();
+    obj3.i = 5;      // This store can be eliminated since the singleton is created after the loop.
+    sum += obj1.i + obj3.i;
+    return sum;
+  }
+
+  /// CHECK-START: int Main.test23(boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.test23(boolean) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  // Test store elimination on merging.
+  static int test23(boolean b) {
+    TestClass obj = new TestClass();
+    obj.i = 3;      // This store can be eliminated since the value flows into each branch.
+    if (b) {
+      obj.i += 1;   // This store cannot be eliminated due to the merge later.
+    } else {
+      obj.i += 2;   // This store cannot be eliminated due to the merge later.
+    }
+    return obj.i;
+  }
+
+  /// CHECK-START: void Main.testFinalizable() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: void Main.testFinalizable() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  // Allocations and stores into finalizable objects cannot be eliminated.
+  static void testFinalizable() {
+    Finalizable finalizable = new Finalizable();
+    finalizable.i = Finalizable.VALUE;
+  }
+
+  static java.lang.ref.WeakReference<Object> getWeakReference() {
+    return new java.lang.ref.WeakReference<>(new Object());
+  }
+
+  static void testFinalizableByForcingGc() {
+    testFinalizable();
+    java.lang.ref.WeakReference<Object> reference = getWeakReference();
+
+    Runtime runtime = Runtime.getRuntime();
+    for (int i = 0; i < 20; ++i) {
+      runtime.gc();
+      System.runFinalization();
+      try {
+        Thread.sleep(1);
+      } catch (InterruptedException e) {
+        throw new AssertionError(e);
+      }
+
+      // Check to see if the weak reference has been garbage collected.
+      if (reference.get() == null) {
+        // A little bit more sleep time to make sure.
+        try {
+          Thread.sleep(100);
+        } catch (InterruptedException e) {
+          throw new AssertionError(e);
+        }
+        if (!Finalizable.sVisited) {
+          System.out.println("finalize() not called.");
+        }
+        return;
+      }
+    }
+    System.out.println("testFinalizableByForcingGc() failed to force gc.");
+  }
+
   public static void assertIntEquals(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -508,5 +664,10 @@
     float[] fa2 = { 1.8f };
     assertFloatEquals(test19(fa1, fa2), 1.8f);
     assertFloatEquals(test20().i, 0);
+    test21();
+    assertIntEquals(test22(), 13);
+    assertIntEquals(test23(true), 4);
+    assertIntEquals(test23(false), 5);
+    testFinalizableByForcingGc();
   }
 }
diff --git a/test/549-checker-types-merge/expected.txt b/test/549-checker-types-merge/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/549-checker-types-merge/expected.txt
diff --git a/test/549-checker-types-merge/info.txt b/test/549-checker-types-merge/info.txt
new file mode 100644
index 0000000..f174e20
--- /dev/null
+++ b/test/549-checker-types-merge/info.txt
@@ -0,0 +1 @@
+Checker test for testing the type merge during reference type propagation.
diff --git a/test/549-checker-types-merge/src/Main.java b/test/549-checker-types-merge/src/Main.java
new file mode 100644
index 0000000..dc27f10
--- /dev/null
+++ b/test/549-checker-types-merge/src/Main.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// To make it easier to follow the tests:
+//  - all interfaces defined in this file extend InterfaceSuper (except InterfaceOtherSuper)
+//  - all classes defined in this file extend ClassSuper (except ClassOtherSuper)
+
+interface InterfaceSuper {}
+interface InterfaceOtherSuper {}
+
+interface InterfaceA extends InterfaceSuper {}
+interface InterfaceB extends InterfaceSuper {}
+interface InterfaceExtendsA extends InterfaceA {}
+interface InterfaceExtendsB extends InterfaceB {}
+
+class ClassSuper {}
+class ClassOtherSuper {}
+
+class ClassA extends ClassSuper {}
+class ClassB extends ClassSuper {}
+class ClassExtendsA extends ClassA {}
+class ClassExtendsB extends ClassB {}
+
+class ClassImplementsInterfaceA extends ClassSuper implements InterfaceA {}
+
+public class Main {
+
+  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeNullContant(boolean cond) {
+    return cond ? null : new Main();
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassExtendsB b) {
+    // Different classes, have a common super type.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassSuper b) {
+    // Different classes, one is the super type of the other.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassSuper a, ClassSuper b) {
+    // Same classes.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassOtherSuper a, ClassSuper b) {
+    // Different classes, have Object as the common super type.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClassWithInterface(boolean cond, ClassImplementsInterfaceA a, InterfaceSuper b) {
+    // Class implements interface.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClassWithInterface(boolean cond, ClassSuper a, InterfaceSuper b) {
+    // Class doesn't implement interface.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceSuper b) {
+    // Different Interfaces, one implements the other.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceSuper b) {
+    // Same interfaces.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceExtendsB b) {
+    // Different Interfaces, have a common super type.
+    return cond ? a : b;
+  }
+
+    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceOtherSuper b) {
+    // Different interfaces.
+    return cond ? a : b;
+  }
+
+  public static void main(String[] args) {
+  }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index bffd0e0..7a22e1b 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -34,7 +34,6 @@
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
   454-get-vreg/get_vreg_jni.cc \
-  455-set-vreg/set_vreg_jni.cc \
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 3e97b86..c830ad4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -464,18 +464,6 @@
 
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
 
-# Known broken tests for the optimizing compiler.
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \
-  455-set-vreg \
-
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-
 # Tests that should fail when the optimizing compiler compiles them non-debuggable.
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
   454-get-vreg \
diff --git a/test/run-test b/test/run-test
index 10ec310..d0da34e 100755
--- a/test/run-test
+++ b/test/run-test
@@ -669,9 +669,9 @@
 # -------------------------------
 # Return whether the Optimizing compiler has read barrier support for ARCH.
 function arch_supports_read_barrier() {
-  # Optimizing has read barrier support for x86 and x86-64 at the
+  # Optimizing has read barrier support for ARM, x86 and x86-64 at the
   # moment.
-  [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
+  [ "x$1" = xarm ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
 }
 
 # Tests named '<number>-checker-*' will also have their CFGs verified with
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 047c24f..02787fb 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
+common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=