Merge "Revert "Inline across dex files.""
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 6b6a9e0..948c756 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -159,6 +159,7 @@
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/jit_code_cache_test.cc \
   runtime/leb128_test.cc \
   runtime/mem_map_test.cc \
   runtime/memory_region_test.cc \
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 8833da3..3e69878 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -848,7 +848,7 @@
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
                  "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
@@ -1502,7 +1502,7 @@
           break;
         }
         case kFixupAdr: {
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[2]);
           LIR* target = lir->target;
           int32_t target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
               : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
@@ -1555,8 +1555,8 @@
         }
         case kFixupMovImmLST: {
           // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
           // If tab_rec is null, this is a literal load. Use target
           LIR* target = lir->target;
           int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
@@ -1565,8 +1565,8 @@
         }
         case kFixupMovImmHST: {
           // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
           // If tab_rec is null, this is a literal load. Use target
           LIR* target = lir->target;
           int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 3182920..d46c25a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -658,7 +658,7 @@
   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
   // as a placeholder for the offset.
   LIR* call = RawLIR(current_dalvik_offset_, kThumb2Bl, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+                     target_method_idx, WrapPointer(target_dex_file), type);
   AppendLIR(call);
   call_method_insns_.push_back(call);
   return call;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 67fabbd..4141bcf 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -200,7 +200,7 @@
     void UpdateIT(LIR* it, const char* new_guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    void OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index ef26323..9193e1b 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1081,9 +1081,10 @@
   return true;
 }
 
-LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
+  LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0);
+  lir->target = target;
 }
 
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 13f9072..9812d9f 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -455,7 +455,7 @@
            case 'T':
              snprintf(tbuf, arraysize(tbuf), "%s", PrettyMethod(
                  static_cast<uint32_t>(lir->operands[1]),
-                 *reinterpret_cast<const DexFile*>(UnwrapPointer(lir->operands[2]))).c_str());
+                 *UnwrapPointer<DexFile>(lir->operands[2])).c_str());
              break;
            case 'u': {
              int offset_1 = lir->operands[0];
@@ -906,9 +906,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kThumb2Bl);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
-
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
       patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
                                                         target_dex_file, target_method_idx));
   }
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index aa5e5b4..329bb1e 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -1003,7 +1003,7 @@
                                       0 : offset_adjustment) + target_lir->offset;
             delta = target_offs - lir->offset;
           } else if (lir->operands[2] >= 0) {
-            EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+            const EmbeddedData* tab = UnwrapPointer<EmbeddedData>(lir->operands[2]);
             delta = tab->offset + offset_adjustment - lir->offset;
           } else {
             // No fixup: this usage allows to retrieve the current PC.
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 1dcbe60..823cb60 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -525,7 +525,7 @@
   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
   // as a placeholder for the offset.
   LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+                     target_method_idx, WrapPointer(target_dex_file), type);
   AppendLIR(call);
   call_method_insns_.push_back(call);
   return call;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index d5f0536..54fd46d 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -203,7 +203,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
   void OpEndIT(LIR* it) OVERRIDE;
   LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
   LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
   void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 92675f3..2372ccc 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -937,9 +937,10 @@
   return true;
 }
 
-LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, kA64Ldr2rp, As32BitReg(reg).GetReg(), 0, 0, 0, 0, target);
+  LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0);
+  lir->target = target;
 }
 
 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 136be94..09a34bf 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -851,9 +851,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kA64Bl1t);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
-
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
       patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
                                                         target_dex_file, target_method_idx));
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 029c0ca..4e7919b 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -16,6 +16,7 @@
 
 #include "mir_to_lir-inl.h"
 
+#include "base/bit_vector-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -88,6 +89,8 @@
   inst->u.m.def_mask = &kEncodeAll;
   LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
   DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
+  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
+  safepoints_.emplace_back(safepoint_pc, current_mir_);
 }
 
 void Mir2Lir::MarkSafepointPCAfter(LIR* after) {
@@ -102,6 +105,8 @@
     InsertLIRAfter(after, safepoint_pc);
   }
   DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
+  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
+  safepoints_.emplace_back(safepoint_pc, current_mir_);
 }
 
 /* Remove a LIR from the list. */
@@ -217,7 +222,7 @@
       }
       LOG(INFO) << "-------- dalvik offset: 0x" << std::hex
                 << lir->dalvik_offset << " @ "
-                << reinterpret_cast<char*>(UnwrapPointer(lir->operands[0]));
+                << UnwrapPointer<char>(lir->operands[0]);
       break;
     case kPseudoExitBlock:
       LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest;
@@ -411,7 +416,7 @@
 LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) {
   while (data_target) {
     if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index &&
-        UnwrapPointer(data_target->operands[1]) == method.dex_file) {
+        UnwrapPointer<DexFile>(data_target->operands[1]) == method.dex_file) {
       return data_target;
     }
     data_target = data_target->next;
@@ -423,7 +428,7 @@
 LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) {
   while (data_target) {
     if (static_cast<uint32_t>(data_target->operands[0]) == type_idx &&
-        UnwrapPointer(data_target->operands[1]) == &dex_file) {
+        UnwrapPointer<DexFile>(data_target->operands[1]) == &dex_file) {
       return data_target;
     }
     data_target = data_target->next;
@@ -486,8 +491,7 @@
   data_lir = code_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
                                               target_dex_file, target_method_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -496,8 +500,7 @@
   data_lir = method_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
                                                 target_dex_file, target_method_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -507,8 +510,7 @@
   data_lir = class_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_type_idx = data_lir->operands[0];
-    const DexFile* class_dex_file =
-      reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* class_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
                                               class_dex_file, target_type_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -767,6 +769,61 @@
 }
 
 void Mir2Lir::CreateNativeGcMap() {
+  if (UNLIKELY((cu_->disable_opt & (1u << kPromoteRegs)) != 0u)) {
+    // If we're not promoting to physical registers, it's safe to use the verifier's notion of
+    // references. (We disable register promotion when type inference finds a type conflict and
+    // in that the case we defer to the verifier to avoid using the compiler's conflicting info.)
+    CreateNativeGcMapWithoutRegisterPromotion();
+    return;
+  }
+
+  ArenaBitVector* references = new (arena_) ArenaBitVector(arena_, mir_graph_->GetNumSSARegs(),
+                                                           false);
+
+  // Calculate max native offset and max reference vreg.
+  MIR* prev_mir = nullptr;
+  int max_ref_vreg = -1;
+  CodeOffset max_native_offset = 0u;
+  for (const auto& entry : safepoints_) {
+    uint32_t native_offset = entry.first->offset;
+    max_native_offset = std::max(max_native_offset, native_offset);
+    MIR* mir = entry.second;
+    UpdateReferenceVRegs(mir, prev_mir, references);
+    max_ref_vreg = std::max(max_ref_vreg, references->GetHighestBitSet());
+    prev_mir = mir;
+  }
+
+  // Build the GC map.
+  uint32_t reg_width = static_cast<uint32_t>((max_ref_vreg + 8) / 8);
+  GcMapBuilder native_gc_map_builder(&native_gc_map_,
+                                     safepoints_.size(),
+                                     max_native_offset, reg_width);
+#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN)
+  ArenaVector<uint8_t> references_buffer(arena_->Adapter());
+  references_buffer.resize(reg_width);
+#endif
+  for (const auto& entry : safepoints_) {
+    uint32_t native_offset = entry.first->offset;
+    MIR* mir = entry.second;
+    UpdateReferenceVRegs(mir, prev_mir, references);
+#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN)
+    // Big-endian or unknown endianness, manually translate the bit vector data.
+    const auto* raw_storage = references->GetRawStorage();
+    for (size_t i = 0; i != reg_width; ++i) {
+      references_buffer[i] = static_cast<uint8_t>(
+          raw_storage[i / sizeof(raw_storage[0])] >> (8u * (i % sizeof(raw_storage[0]))));
+    }
+    native_gc_map_builder.AddEntry(native_offset, &references_buffer[0]);
+#else
+    // For little-endian, the bytes comprising the bit vector's raw storage are what we need.
+    native_gc_map_builder.AddEntry(native_offset,
+                                   reinterpret_cast<const uint8_t*>(references->GetRawStorage()));
+#endif
+    prev_mir = mir;
+  }
+}
+
+void Mir2Lir::CreateNativeGcMapWithoutRegisterPromotion() {
   DCHECK(!encoded_mapping_table_.empty());
   MappingTable mapping_table(&encoded_mapping_table_[0]);
   uint32_t max_native_offset = 0;
@@ -965,6 +1022,7 @@
       block_label_list_(nullptr),
       promotion_map_(nullptr),
       current_dalvik_offset_(0),
+      current_mir_(nullptr),
       estimated_native_code_size_(0),
       reg_pool_(nullptr),
       live_sreg_(0),
@@ -984,6 +1042,7 @@
       slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
       mask_cache_(arena),
+      safepoints_(arena->Adapter()),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
@@ -992,7 +1051,7 @@
   pointer_storage_.reserve(128);
   slow_paths_.reserve(32);
   // Reserve pointer id 0 for nullptr.
-  size_t null_idx = WrapPointer(nullptr);
+  size_t null_idx = WrapPointer<void>(nullptr);
   DCHECK_EQ(null_idx, 0U);
 }
 
@@ -1201,8 +1260,7 @@
     data_target->operands[2] = type;
   }
   // Loads a code pointer. Code from oat file can be mapped anywhere.
-  LIR* load_pc_rel = OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
   DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
 }
@@ -1219,8 +1277,7 @@
     data_target->operands[2] = type;
   }
   // Loads an ArtMethod pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
   DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
 }
@@ -1234,8 +1291,7 @@
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file));
   }
   // Loads a Class pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
 }
 
 std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
@@ -1274,4 +1330,97 @@
   UNREACHABLE();
 }
 
+void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
+  // Mark the references coming from the first predecessor.
+  DCHECK(bb != nullptr);
+  DCHECK(bb->block_type == kEntryBlock || !bb->predecessors.empty());
+  BasicBlock* first_bb =
+      (bb->block_type == kEntryBlock) ? bb : mir_graph_->GetBasicBlock(bb->predecessors[0]);
+  DCHECK(first_bb != nullptr);
+  DCHECK(first_bb->data_flow_info != nullptr);
+  DCHECK(first_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
+  const int32_t* first_vreg_to_ssa_map = first_bb->data_flow_info->vreg_to_ssa_map_exit;
+  references->ClearAllBits();
+  for (uint32_t vreg = 0, num_vregs = mir_graph_->GetNumOfCodeVRs(); vreg != num_vregs; ++vreg) {
+    int32_t sreg = first_vreg_to_ssa_map[vreg];
+    if (sreg != INVALID_SREG && mir_graph_->reg_location_[sreg].ref &&
+        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[sreg])) {
+      references->SetBit(vreg);
+    }
+  }
+  // Unmark the references that are merging with a different value.
+  for (size_t i = 1u, num_pred = bb->predecessors.size(); i < num_pred; ++i) {
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[i]);
+    DCHECK(pred_bb != nullptr);
+    DCHECK(pred_bb->data_flow_info != nullptr);
+    DCHECK(pred_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
+    const int32_t* pred_vreg_to_ssa_map = pred_bb->data_flow_info->vreg_to_ssa_map_exit;
+    for (uint32_t vreg : references->Indexes()) {
+      if (first_vreg_to_ssa_map[vreg] != pred_vreg_to_ssa_map[vreg]) {
+        // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
+        // so clearing the bit has no effect on the iterator.
+        references->ClearBit(vreg);
+      }
+    }
+  }
+  if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
+      static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
+    // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
+    // instruction to the previous block. However, the MIRGraph data used above
+    // doesn't reflect that, so we still need to process that MIR insn here.
+    DCHECK_EQ(bb->predecessors.size(), 1u);
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
+    DCHECK(pred_bb != nullptr);
+    DCHECK(pred_bb->last_mir_insn != nullptr);
+    UpdateReferenceVRegsLocal(nullptr, pred_bb->last_mir_insn, references);
+  }
+}
+
+bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
+  DCHECK(mir == nullptr || mir->bb == prev_mir->bb);
+  DCHECK(prev_mir != nullptr);
+  while (prev_mir != nullptr) {
+    if (prev_mir == mir) {
+      return true;
+    }
+    const size_t num_defs = prev_mir->ssa_rep->num_defs;
+    const int32_t* defs = prev_mir->ssa_rep->defs;
+    if (num_defs == 1u && mir_graph_->reg_location_[defs[0]].ref &&
+        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[defs[0]])) {
+      references->SetBit(mir_graph_->SRegToVReg(defs[0]));
+    } else {
+      for (size_t i = 0u; i != num_defs; ++i) {
+        references->ClearBit(mir_graph_->SRegToVReg(defs[i]));
+      }
+    }
+    prev_mir = prev_mir->next;
+  }
+  return false;
+}
+
+void Mir2Lir::UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references) {
+  if (mir == nullptr) {
+    // Safepoint in entry sequence.
+    InitReferenceVRegs(mir_graph_->GetEntryBlock(), references);
+    return;
+  }
+  if (IsInstructionReturn(mir->dalvikInsn.opcode) ||
+      mir->dalvikInsn.opcode == Instruction::RETURN_VOID_NO_BARRIER) {
+    references->ClearAllBits();
+    if (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT) {
+      references->SetBit(mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]));
+    }
+    return;
+  }
+  if (prev_mir != nullptr && mir->bb == prev_mir->bb &&
+      UpdateReferenceVRegsLocal(mir, prev_mir, references)) {
+    return;
+  }
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  InitReferenceVRegs(bb, references);
+  bool success = UpdateReferenceVRegsLocal(mir, bb->first_mir_insn, references);
+  DCHECK(success) << "MIR @0x" << std::hex << mir->offset << " not in BB#" << std::dec << mir->bb;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index d613cca..b80fd74 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -2151,9 +2151,6 @@
   RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
   LoadConstantNoClobber(rl_result.reg, value);
   StoreValue(rl_dest, rl_result);
-  if (value == 0) {
-    Workaround7250540(rl_dest, rl_result.reg);
-  }
 }
 
 void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest,
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index db844bc..b71691f 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -37,48 +37,6 @@
 }
 
 /*
- * Temporary workaround for Issue 7250540.  If we're loading a constant zero into a
- * promoted floating point register, also copy a zero into the int/ref identity of
- * that sreg.
- */
-void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
-  if (rl_dest.fp) {
-    int pmap_index = SRegToPMap(rl_dest.s_reg_low);
-    const bool is_fp_promoted = promotion_map_[pmap_index].fp_location == kLocPhysReg;
-    const bool is_core_promoted = promotion_map_[pmap_index].core_location == kLocPhysReg;
-    if (is_fp_promoted || is_core_promoted) {
-      // Now, determine if this vreg is ever used as a reference.  If not, we're done.
-      bool used_as_reference = false;
-      int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-      for (int i = 0; !used_as_reference && (i < mir_graph_->GetNumSSARegs()); i++) {
-        if (mir_graph_->SRegToVReg(mir_graph_->reg_location_[i].s_reg_low) == base_vreg) {
-          used_as_reference |= mir_graph_->reg_location_[i].ref;
-        }
-      }
-      if (!used_as_reference) {
-        return;
-      }
-      RegStorage temp_reg = zero_reg;
-      if (!temp_reg.Valid()) {
-        temp_reg = AllocTemp();
-        LoadConstant(temp_reg, 0);
-      }
-      if (is_core_promoted) {
-        // Promoted - just copy in a zero
-        OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
-      } else {
-        // Lives in the frame, need to store.
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32, kNotVolatile);
-      }
-      if (!zero_reg.Valid()) {
-        FreeTemp(temp_reg);
-      }
-    }
-  }
-}
-
-/*
  * Load a Dalvik register into a physical register.  Take care when
  * using this routine, as it doesn't perform any bookkeeping regarding
  * register liveness.  That is the responsibility of the caller.
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index 0218dcd..ed72d67 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -562,8 +562,8 @@
          * and is found in lir->target.  If operands[3] is non-NULL,
          * then it is a Switch/Data table.
          */
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
@@ -589,14 +589,14 @@
           res = kRetryAll;
         }
       } else if (lir->opcode == kMipsDeltaLo) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = delta & 0xffff;
       } else if (lir->opcode == kMipsDeltaHi) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = (delta >> 16) & 0xffff;
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 47837a6..649b6c9 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -172,7 +172,7 @@
     LIR* OpIT(ConditionCode cond, const char* guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    void OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 8c9acf6..8093c97 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -348,7 +348,7 @@
   return true;
 }
 
-LIR* MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   UNUSED(reg, target);
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips";
   UNREACHABLE();
diff --git a/compiler/dex/quick/mips64/assemble_mips64.cc b/compiler/dex/quick/mips64/assemble_mips64.cc
index 17a0ef1..d96561b 100644
--- a/compiler/dex/quick/mips64/assemble_mips64.cc
+++ b/compiler/dex/quick/mips64/assemble_mips64.cc
@@ -629,8 +629,8 @@
          * and is found in lir->target.  If operands[3] is non-NULL,
          * then it is a Switch/Data table.
          */
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
@@ -651,14 +651,14 @@
           res = kRetryAll;
         }
       } else if (lir->opcode == kMips64DeltaLo) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = delta & 0xffff;
       } else if (lir->opcode == kMips64DeltaHi) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = (delta >> 16) & 0xffff;
diff --git a/compiler/dex/quick/mips64/codegen_mips64.h b/compiler/dex/quick/mips64/codegen_mips64.h
index 57c30d8..c9fd62f 100644
--- a/compiler/dex/quick/mips64/codegen_mips64.h
+++ b/compiler/dex/quick/mips64/codegen_mips64.h
@@ -182,7 +182,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide);
   void OpEndIT(LIR* it);
   LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+  void OpPcRelLoad(RegStorage reg, LIR* target);
   LIR* OpReg(OpKind op, RegStorage r_dest_src);
   void OpRegCopy(RegStorage r_dest, RegStorage r_src);
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/mips64/int_mips64.cc b/compiler/dex/quick/mips64/int_mips64.cc
index 8a57c82..5c545bb 100644
--- a/compiler/dex/quick/mips64/int_mips64.cc
+++ b/compiler/dex/quick/mips64/int_mips64.cc
@@ -283,7 +283,7 @@
   return true;
 }
 
-LIR* Mips64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void Mips64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   UNUSED(reg, target);
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips64";
   UNREACHABLE();
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index afacee0..0b480a0 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -406,6 +406,7 @@
 bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special) {
   DCHECK(special.flags & kInlineSpecial);
   current_dalvik_offset_ = mir->offset;
+  DCHECK(current_mir_ == nullptr);  // Safepoints attributed to prologue.
   MIR* return_mir = nullptr;
   bool successful = false;
   EnsureInitializedArgMappingToPhysicalReg();
@@ -587,9 +588,6 @@
     case Instruction::MOVE_FROM16:
     case Instruction::MOVE_OBJECT_FROM16:
       StoreValue(rl_dest, rl_src[0]);
-      if (rl_src[0].is_const && (mir_graph_->ConstantValue(rl_src[0]) == 0)) {
-        Workaround7250540(rl_dest, RegStorage::InvalidReg());
-      }
       break;
 
     case Instruction::MOVE_WIDE:
@@ -1276,6 +1274,7 @@
     }
 
     current_dalvik_offset_ = mir->offset;
+    current_mir_ = mir;
     int opcode = mir->dalvikInsn.opcode;
 
     GenPrintLabel(mir);
@@ -1376,6 +1375,7 @@
 
 LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel(int opcode) {
   m2l_->SetCurrentDexPc(current_dex_pc_);
+  m2l_->current_mir_ = current_mir_;
   LIR* target = m2l_->NewLIR0(opcode);
   fromfast_->target = target;
   return target;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 70785dc..236bad7 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -131,6 +131,7 @@
 #define MAX_ASSEMBLER_RETRIES 50
 
 class BasicBlock;
+class BitVector;
 struct CallInfo;
 struct CompilationUnit;
 struct InlineMethod;
@@ -491,7 +492,8 @@
     class LIRSlowPath : public ArenaObject<kArenaAllocSlowPaths> {
      public:
       LIRSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont = nullptr)
-          : m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(m2l->current_dalvik_offset_),
+          : m2l_(m2l), cu_(m2l->cu_),
+            current_dex_pc_(m2l->current_dalvik_offset_), current_mir_(m2l->current_mir_),
             fromfast_(fromfast), cont_(cont) {
       }
       virtual ~LIRSlowPath() {}
@@ -511,6 +513,7 @@
       Mir2Lir* const m2l_;
       CompilationUnit* const cu_;
       const DexOffset current_dex_pc_;
+      MIR* current_mir_;
       LIR* const fromfast_;
       LIR* const cont_;
     };
@@ -582,14 +585,16 @@
      * TUNING: If use of these utilities becomes more common on 32-bit builds, it
      * may be worth conditionally-compiling a set of identity functions here.
      */
-    uint32_t WrapPointer(void* pointer) {
+    template <typename T>
+    uint32_t WrapPointer(const T* pointer) {
       uint32_t res = pointer_storage_.size();
       pointer_storage_.push_back(pointer);
       return res;
     }
 
-    void* UnwrapPointer(size_t index) {
-      return pointer_storage_[index];
+    template <typename T>
+    const T* UnwrapPointer(size_t index) {
+      return reinterpret_cast<const T*>(pointer_storage_[index]);
     }
 
     // strdup(), but allocates from the arena.
@@ -670,6 +675,7 @@
     bool VerifyCatchEntries();
     void CreateMappingTables();
     void CreateNativeGcMap();
+    void CreateNativeGcMapWithoutRegisterPromotion();
     int AssignLiteralOffset(CodeOffset offset);
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
@@ -1379,7 +1385,7 @@
     virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0;
     virtual void OpEndIT(LIR* it) = 0;
     virtual LIR* OpMem(OpKind op, RegStorage r_base, int disp) = 0;
-    virtual LIR* OpPcRelLoad(RegStorage reg, LIR* target) = 0;
+    virtual void OpPcRelLoad(RegStorage reg, LIR* target) = 0;
     virtual LIR* OpReg(OpKind op, RegStorage r_dest_src) = 0;
     virtual void OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
     virtual LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) = 0;
@@ -1729,6 +1735,16 @@
     // See CheckRegLocationImpl.
     void CheckRegLocation(RegLocation rl) const;
 
+    // Find the references at the beginning of a basic block (for generating GC maps).
+    void InitReferenceVRegs(BasicBlock* bb, BitVector* references);
+
+    // Update references from prev_mir to mir in the same BB. If mir is null or before
+    // prev_mir, report failure (return false) and update references to the end of the BB.
+    bool UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references);
+
+    // Update references from prev_mir to mir.
+    void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1745,8 +1761,7 @@
     ArenaVector<FillArrayData*> fill_array_data_;
     ArenaVector<RegisterInfo*> tempreg_info_;
     ArenaVector<RegisterInfo*> reginfo_map_;
-    ArenaVector<void*> pointer_storage_;
-    CodeOffset current_code_offset_;    // Working byte offset of machine instructons.
+    ArenaVector<const void*> pointer_storage_;
     CodeOffset data_offset_;            // starting offset of literal pool.
     size_t total_size_;                   // header + code size.
     LIR* block_label_list_;
@@ -1761,6 +1776,7 @@
      * The low-level LIR creation utilites will pull it from here.  Rework this.
      */
     DexOffset current_dalvik_offset_;
+    MIR* current_mir_;
     size_t estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
     std::unique_ptr<RegisterPool> reg_pool_;
     /*
@@ -1799,6 +1815,9 @@
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
 
+    // Record the MIR that generated a given safepoint (nullptr for prologue safepoints).
+    ArenaVector<std::pair<LIR*, MIR*>> safepoints_;
+
   protected:
     // ABI support
     class ShortyArg {
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 6f26b78..118ab1d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1586,13 +1586,11 @@
                            int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
-    Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(table_or_disp));
+    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp);
     disp = tab_rec->offset;
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
-    Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(raw_base_or_table));
+    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
     disp = tab_rec->offset;
   }
   if (entry->opcode == kX86PcRelLoadRA) {
@@ -1794,8 +1792,7 @@
             DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
             DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg));
             // Grab the target offset from the saved data.
-            Mir2Lir::EmbeddedData* tab_rec =
-                reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(lir->operands[4]));
+            const EmbeddedData* tab_rec = UnwrapPointer<Mir2Lir::EmbeddedData>(lir->operands[4]);
             CodeOffset target = tab_rec->offset;
             // Handle 64 bit RIP addressing.
             // Offset is relative to next instruction.
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 20163b4..040a8c4 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -296,7 +296,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
   void OpEndIT(LIR* it) OVERRIDE;
   LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
   LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
   void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 25e34e3..4eb626c 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1324,7 +1324,7 @@
   return true;
 }
 
-LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   if (cu_->target64) {
     // We can do this directly using RIP addressing.
     // We don't know the proper offset for the value, so pick one that will force
@@ -1334,7 +1334,7 @@
     LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
     res->target = target;
     res->flags.fixup = kFixupLoad;
-    return res;
+    return;
   }
 
   CHECK(base_of_code_ != nullptr);
@@ -1353,11 +1353,9 @@
   // 4 byte offset.  We will fix this up in the assembler later to have the right
   // value.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
-                    0, 0, target);
+  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256);
   res->target = target;
   res->flags.fixup = kFixupLoad;
-  return res;
 }
 
 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index dbe4848..f128eb7 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -390,7 +390,7 @@
              break;
           }
           case 'p': {
-            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
+            const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(operand);
             buf += StringPrintf("0x%08x", tab_rec->offset);
             break;
           }
@@ -1062,8 +1062,7 @@
   for (LIR* p : method_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
       uint32_t target_method_idx = p->operands[2];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
@@ -1075,8 +1074,7 @@
   for (LIR* p : class_type_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
 
-      const DexFile* class_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
+      const DexFile* class_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
       uint32_t target_type_idx = p->operands[2];
 
       // The offset to patch is the last 4 bytes of the instruction.
@@ -1090,8 +1088,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kX86CallI);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index beb5755..8b31154 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -217,20 +217,21 @@
   auto* const mapping_table = compiled_method->GetMappingTable();
   auto* const vmap_table = compiled_method->GetVmapTable();
   auto* const gc_map = compiled_method->GetGcMap();
+  CHECK(gc_map != nullptr) << PrettyMethod(method);
   // Write out pre-header stuff.
   uint8_t* const mapping_table_ptr = code_cache->AddDataArray(
       self, mapping_table->data(), mapping_table->data() + mapping_table->size());
-  if (mapping_table == nullptr) {
+  if (mapping_table_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   uint8_t* const vmap_table_ptr = code_cache->AddDataArray(
       self, vmap_table->data(), vmap_table->data() + vmap_table->size());
-  if (vmap_table == nullptr) {
+  if (vmap_table_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   uint8_t* const gc_map_ptr = code_cache->AddDataArray(
       self, gc_map->data(), gc_map->data() + gc_map->size());
-  if (gc_map == nullptr) {
+  if (gc_map_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   // Don't touch this until you protect / unprotect the code.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index aeec5dd..99283a0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -63,6 +63,7 @@
 using helpers::VIXLRegCodeFromART;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
+using helpers::ARM64EncodableConstantOrRegister;
 
 static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
 static constexpr int kCurrentMethodStackOffset = 0;
@@ -1106,7 +1107,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1398,7 +1399,7 @@
   switch (in_type) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
+      locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1468,7 +1469,7 @@
 void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
@@ -2116,7 +2117,7 @@
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RegisterOrConstant(neg->InputAt(0)));
+      locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 754dd10..02b9b32 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2730,26 +2730,45 @@
   Label less, greater, done;
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
+      Register left_low = left.AsRegisterPairLow<Register>();
+      Register left_high = left.AsRegisterPairHigh<Register>();
+      int32_t val_low = 0;
+      int32_t val_high = 0;
+      bool right_is_const = false;
+
+      if (right.IsConstant()) {
+        DCHECK(right.GetConstant()->IsLongConstant());
+        right_is_const = true;
+        int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
+        val_low = Low32Bits(val);
+        val_high = High32Bits(val);
+      }
+
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
+        __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+        __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_high == 0) {
+          __ testl(left_high, left_high);
+        } else {
+          __ cmpl(left_high, Immediate(val_high));
+        }
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>());
+        __ cmpl(left_low, right.AsRegisterPairLow<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
+        __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairLow<Register>(),
-                Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_low == 0) {
+          __ testl(left_low, left_low);
+        } else {
+          __ cmpl(left_low, Immediate(val_low));
+        }
       }
       break;
     }
@@ -3645,14 +3664,21 @@
         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
       }
     } else if (constant->IsFloatConstant()) {
-      float value = constant->AsFloatConstant()->GetValue();
-      Immediate imm(bit_cast<float, int32_t>(value));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<float, int32_t>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        ScratchRegisterScope ensure_scratch(
-            this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-        Register temp = static_cast<Register>(ensure_scratch.GetRegister());
-        __ movl(temp, imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), temp);
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // Easy handling of 0.0.
+          __ xorps(dest, dest);
+        } else {
+          ScratchRegisterScope ensure_scratch(
+              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+          __ movl(temp, Immediate(value));
+          __ movd(dest, temp);
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(ESP, destination.GetStackIndex()), imm);
@@ -4107,18 +4133,38 @@
     } else {
       DCHECK(second.IsConstant()) << second;
       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-      Immediate low(Low32Bits(value));
-      Immediate high(High32Bits(value));
+      int32_t low_value = Low32Bits(value);
+      int32_t high_value = High32Bits(value);
+      Immediate low(low_value);
+      Immediate high(high_value);
+      Register first_low = first.AsRegisterPairLow<Register>();
+      Register first_high = first.AsRegisterPairHigh<Register>();
       if (instruction->IsAnd()) {
-        __ andl(first.AsRegisterPairLow<Register>(), low);
-        __ andl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value == 0) {
+          __ xorl(first_low, first_low);
+        } else if (low_value != -1) {
+          __ andl(first_low, low);
+        }
+        if (high_value == 0) {
+          __ xorl(first_high, first_high);
+        } else if (high_value != -1) {
+          __ andl(first_high, high);
+        }
       } else if (instruction->IsOr()) {
-        __ orl(first.AsRegisterPairLow<Register>(), low);
-        __ orl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ orl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ orl(first_high, high);
+        }
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.AsRegisterPairLow<Register>(), low);
-        __ xorl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ xorl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ xorl(first_high, high);
+        }
       }
     }
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dbd7c9e..d09c8f8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -956,7 +956,7 @@
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -982,7 +982,18 @@
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
-      __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>());
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        if (value == 0) {
+          __ testq(left_reg, left_reg);
+        } else {
+          __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+        }
+      } else {
+        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+      }
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1865,17 +1876,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      HInstruction* rhs = add->InputAt(1);
-      bool is_int32_constant = false;
-      if (rhs->IsLongConstant()) {
-        int64_t value = rhs->AsLongConstant()->GetValue();
-        if (static_cast<int32_t>(value) == value) {
-          is_int32_constant = true;
-        }
-      }
-      locations->SetInAt(1,
-          is_int32_constant ? Location::RegisterOrConstant(rhs) :
-                              Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1973,7 +1974,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2007,7 +2008,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
@@ -2038,8 +2045,13 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::SameAsFirstInput());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
+      if (locations->InAt(1).IsConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimFloat:
@@ -2059,9 +2071,9 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.Equals(locations->Out()));
       if (second.IsRegister()) {
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
@@ -2075,16 +2087,27 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ imulq(locations->Out().AsRegister<CpuRegister>(),
+                 first.AsRegister<CpuRegister>(),
+                 Immediate(static_cast<int32_t>(value)));
+      } else {
+        DCHECK(first.Equals(locations->Out()));
+        __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
@@ -3320,20 +3343,35 @@
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     } else if (constant->IsFloatConstant()) {
-      Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<float, int32_t>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movl(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // easy FP 0.0.
+          __ xorps(dest, dest);
+        } else {
+          __ movl(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
-      Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+      double fp_value =  constant->AsDoubleConstant()->GetValue();
+      int64_t value = bit_cast<double, int64_t>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movq(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          __ xorpd(dest, dest);
+        } else {
+          __ movq(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), imm);
@@ -3673,8 +3711,9 @@
   if (instruction->GetType() == Primitive::kPrimInt) {
     locations->SetInAt(1, Location::Any());
   } else {
-    // Request a register to avoid loading a 64bits constant.
+    // We can handle 32 bit constants.
     locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
@@ -3730,13 +3769,34 @@
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    CpuRegister first_reg = first.AsRegister<CpuRegister>();
+    bool second_is_constant = false;
+    int64_t value = 0;
+    if (second.IsConstant()) {
+      second_is_constant = true;
+      value = second.GetConstant()->AsLongConstant()->GetValue();
+      DCHECK(IsInt<32>(value));
+    }
+
     if (instruction->IsAnd()) {
-      __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ andq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else if (instruction->IsOr()) {
-      __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ orq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else {
       DCHECK(instruction->IsXor());
-      __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ xorq(first_reg, second.AsRegister<CpuRegister>());
+      }
     }
   }
 }
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 9447d3b..fd8c0c6 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -183,6 +183,40 @@
   }
 }
 
+static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant());
+
+  // For single uses we let VIXL handle the constant generation since it will
+  // use registers that are not managed by the register allocator (wip0, wip1).
+  if (constant->GetUses().HasOnlyOneUse()) {
+    return true;
+  }
+
+  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+  if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() || instr->IsCompare()) {
+    // Uses aliases of ADD/SUB instructions.
+    return vixl::Assembler::IsImmAddSub(value);
+  } else if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
+    // Uses logical operations.
+    return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+  } else {
+    DCHECK(instr->IsNeg());
+    // Uses mov -immediate.
+    return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+  }
+}
+
+static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
+                                                        HInstruction* instr) {
+  if (constant->IsConstant()
+      && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 4ac1fe8..a1ae670 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -56,6 +56,19 @@
       : Location::RequiresRegister();
 }
 
+Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
+  if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) {
+    return Location::RequiresRegister();
+  }
+
+  // Does the long constant fit in a 32 bit int?
+  int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue();
+
+  return IsInt<32>(value)
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
   return instruction->IsConstant()
       ? Location::ConstantLocation(instruction->AsConstant())
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 566c0da..de876be 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -345,6 +345,7 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
+  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
 
   // The location of the first input to the instruction will be
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b70f925..933a8a0 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -462,6 +462,16 @@
     return nullptr;
   }
 
+  // Implementation of the space filter: do not compile a code item whose size in
+  // code units is bigger than 256.
+  static constexpr size_t kSpaceFilterOptimizingThreshold = 256;
+  const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
+  if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
+      && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
+    compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+    return nullptr;
+  }
+
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 3ebf0f8..22ec2a5 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,6 +38,7 @@
   kNotCompiledUnresolvedMethod,
   kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
+  kNotCompiledSpaceFilter,
   kNotOptimizedTryCatch,
   kNotOptimizedDisabled,
   kNotCompiledCantAccesType,
@@ -96,6 +97,7 @@
       case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
       case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
       case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f2704b7..bd155ed 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1277,6 +1277,14 @@
 }
 
 
+void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // orq only supports 32b immediate.
+  EmitRex64(dst);
+  EmitComplex(1, Operand(dst), imm);
+}
+
+
 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(dst, src);
@@ -1548,27 +1556,30 @@
 
 
 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
+  imulq(reg, reg, imm);
+}
+
+void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
 
-  EmitRex64(reg, reg);
+  EmitRex64(dst, reg);
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int64_t v64 = imm.value();
   if (IsInt<8>(v64)) {
     // Sign-extension works.
     EmitUint8(0x6B);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
   } else {
     // Not representable, use full immediate.
     EmitUint8(0x69);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitImmediate(imm);
   }
 }
 
-
 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 5dfcf45..495f74f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -429,6 +429,7 @@
   void orl(CpuRegister dst, CpuRegister src);
   void orl(CpuRegister reg, const Address& address);
   void orq(CpuRegister dst, CpuRegister src);
+  void orq(CpuRegister dst, const Immediate& imm);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorl(CpuRegister dst, const Immediate& imm);
@@ -467,6 +468,7 @@
   void imulq(CpuRegister dst, CpuRegister src);
   void imulq(CpuRegister reg, const Immediate& imm);
   void imulq(CpuRegister reg, const Address& address);
+  void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
 
   void imull(CpuRegister reg);
   void imull(const Address& address);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 44b07e2..dfea783 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -109,9 +109,14 @@
 
   UsageError("Usage: dex2oat [options]...");
   UsageError("");
-  UsageError("  --dex-file=<dex-file>: specifies a .dex file to compile.");
+  UsageError("  --dex-file=<dex-file>: specifies a .dex, .jar, or .apk file to compile.");
   UsageError("      Example: --dex-file=/system/framework/core.jar");
   UsageError("");
+  UsageError("  --dex-location=<dex-location>: specifies an alternative dex location to");
+  UsageError("      encode in the oat file for the corresponding --dex-file argument.");
+  UsageError("      Example: --dex-file=/home/build/out/system/framework/core.jar");
+  UsageError("               --dex-location=/system/framework/core.jar");
+  UsageError("");
   UsageError("  --zip-fd=<file-descriptor>: specifies a file descriptor of a zip file");
   UsageError("      containing a classes.dex file to compile.");
   UsageError("      Example: --zip-fd=5");
@@ -614,7 +619,6 @@
           Usage("Unknown compiler backend: %s", backend_str.data());
         }
       } else if (option.starts_with("--compiler-filter=")) {
-        requested_specific_compiler = true;
         compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
       } else if (option == "--compile-pic") {
         compile_pic = true;
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6296cf5..7144577 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDebuggerActive;     // debugger is making requests.
 static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
@@ -319,6 +318,7 @@
 static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
+bool Dbg::gDebuggerActive = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -331,7 +331,6 @@
 // Deoptimization support.
 std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
 size_t Dbg::full_deoptimization_event_count_ = 0;
-size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
 // Instrumentation event reference counters.
 size_t Dbg::dex_pc_change_event_ref_count_ = 0;
@@ -620,7 +619,7 @@
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
   // Only called from the JDWP handler thread.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     return;
   }
 
@@ -634,7 +633,6 @@
     MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
-    CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
     CHECK_EQ(dex_pc_change_event_ref_count_, 0U);
     CHECK_EQ(method_enter_event_ref_count_, 0U);
     CHECK_EQ(method_exit_event_ref_count_, 0U);
@@ -673,7 +671,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
 
   // Debugger may not be active at this point.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     {
       // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
       // This prevents us from having any pending deoptimization request when the debugger attaches
@@ -681,7 +679,6 @@
       MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
-      delayed_full_undeoptimization_count_ = 0U;
     }
     if (instrumentation_events_ != 0) {
       runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
@@ -704,10 +701,6 @@
   gDebuggerConnected = false;
 }
 
-bool Dbg::IsDebuggerActive() {
-  return gDebuggerActive;
-}
-
 void Dbg::ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options) {
   CHECK_NE(jdwp_options.transport, JDWP::kJdwpTransportUnknown);
   gJdwpOptions = jdwp_options;
@@ -3020,29 +3013,6 @@
   }
 }
 
-void Dbg::DelayFullUndeoptimization() {
-  if (RequiresDeoptimization()) {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    ++delayed_full_undeoptimization_count_;
-    DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
-  }
-}
-
-void Dbg::ProcessDelayedFullUndeoptimizations() {
-  // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
-  {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    while (delayed_full_undeoptimization_count_ > 0) {
-      DeoptimizationRequest req;
-      req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
-      req.SetMethod(nullptr);
-      RequestDeoptimizationLocked(req);
-      --delayed_full_undeoptimization_count_;
-    }
-  }
-  ManageDeoptimization();
-}
-
 void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
   if (req.GetKind() == DeoptimizationRequest::kNothing) {
     // Nothing to do.
@@ -3352,6 +3322,125 @@
   }
 }
 
+bool Dbg::IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m) {
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc == nullptr) {
+    // If we are not single-stepping, then we don't have to force interpreter.
+    return false;
+  }
+  if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
+    // If we are in interpreter only mode, then we don't have to force interpreter.
+    return false;
+  }
+
+  if (!m->IsNative() && !m->IsProxyMethod()) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Dbg::IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+    // If we are stepping out from a static initializer, by issuing a step
+    // in or step over, that was implicitly invoked by calling a static method,
+    // then we need to step into that method. Having a lower stack depth than
+    // the one the single step control has indicates that the step originates
+    // from the static initializer.
+    if (ssc->GetStepDepth() != JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // There are cases where we have to force interpreter on deoptimized methods,
+  // because in some cases the call will not be performed by invoking an entry
+  // point that has been replaced by the deoptimization, but instead by directly
+  // invoking the compiled code of the method, for example.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we are stepping out from a static initializer, by issuing a step
+    // out, that was implicitly invoked by calling a static method, then we
+    // need to step into the caller of that method. Having a lower stack
+    // depth than the one the single step control has indicates that the
+    // step originates from the static initializer.
+    if (ssc->GetStepDepth() == JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // If we are returning from a static intializer, that was implicitly
+  // invoked by calling a static method and the caller is deoptimized,
+  // then we have to deoptimize the stack without forcing interpreter
+  // on the static method that was called originally. This problem can
+  // be solved easily by forcing instrumentation on the called method,
+  // because the instrumentation exit hook will recognise the need of
+  // stack deoptimization by calling IsForcedInterpreterNeededForUpcall.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // The debugger is not interested in what is happening under the level
+    // of the step, thus we only force interpreter when we are not below of
+    // the step.
+    if (ssc->GetStackDepth() >= GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // We have to require stack deoptimization if the upcall is deoptimized.
+  return instrumentation->IsDeoptimized(m);
+}
+
 // Scoped utility class to suspend a thread so that we may do tasks such as walk its stack. Doesn't
 // cause suspension if the thread is the current thread.
 class ScopedThreadSuspension {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 01c9d5d..d015294 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -243,7 +243,9 @@
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
-  static bool IsDebuggerActive();
+  static bool IsDebuggerActive() {
+    return gDebuggerActive;
+  }
 
   // Configures JDWP with parsed command-line options.
   static void ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options);
@@ -543,13 +545,6 @@
       LOCKS_EXCLUDED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Support delayed full undeoptimization requests. This is currently only used for single-step
-  // events.
-  static void DelayFullUndeoptimization() LOCKS_EXCLUDED(Locks::deoptimization_lock_);
-  static void ProcessDelayedFullUndeoptimizations()
-      LOCKS_EXCLUDED(Locks::deoptimization_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Manage deoptimization after updating JDWP events list. Suspends all threads, processes each
   // request and finally resumes all threads.
   static void ManageDeoptimization()
@@ -564,6 +559,53 @@
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  /*
+   * Forced interpreter checkers for single-step and continue support.
+   */
+
+  // Indicates whether we need to force the use of interpreter to invoke a method.
+  // This allows to single-step or continue into the called method.
+  static bool IsForcedInterpreterNeededForCalling(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForCallingImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter entrypoint when calling a
+  // method through the resolution trampoline. This allows to single-step or continue into
+  // the called method.
+  static bool IsForcedInterpreterNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of instrumentation entrypoint when calling
+  // a method through the resolution trampoline. This allows to deoptimize the stack for
+  // debugging when we returned from the called method.
+  static bool IsForcedInstrumentationNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInstrumentationNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter when returning from the
+  // interpreter into the runtime. This allows to deoptimize the stack and continue
+  // execution with interpreter for debugging.
+  static bool IsForcedInterpreterNeededForUpcall(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForUpcallImpl(thread, m);
+  }
+
   // Single-stepping.
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
@@ -690,11 +732,27 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static bool IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  // Indicates whether the debugger is making requests.
+  static bool gDebuggerActive;
+
+  // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
   // Deoptimization requests to be processed each time the event list is updated. This is used when
@@ -709,10 +767,6 @@
   // undeoptimize when the last event is unregistered (when the counter is set to 0).
   static size_t full_deoptimization_event_count_ GUARDED_BY(Locks::deoptimization_lock_);
 
-  // Count the number of full undeoptimization requests delayed to next resume or end of debug
-  // session.
-  static size_t delayed_full_undeoptimization_count_ GUARDED_BY(Locks::deoptimization_lock_);
-
   static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event);
 
   // Weak global type cache, TODO improve this.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 70ee042..5eb97d8 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -30,6 +30,7 @@
 #include "mirror/object_array-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "debugger.h"
 
 namespace art {
 
@@ -639,6 +640,14 @@
     JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    // Request a stack deoptimization if needed
+    mirror::ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+      self->SetException(Thread::GetDeoptimizationException());
+      self->SetDeoptimizationReturnValue(result);
+    }
+
     // No need to restore the args since the method has already been run by the interpreter.
     return result.GetJ();
   }
@@ -950,14 +959,37 @@
         called->GetDexCache()->SetResolvedMethod(called_dex_method_idx, called);
       }
     }
+
     // Ensure that the called method's class is initialized.
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(soa.Self(), called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromQuickCompiledCode();
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (UNLIKELY(Dbg::IsForcedInstrumentationNeededForResolution(self, caller))) {
+        // If the caller is deoptimized (by a breakpoint, for example), we have to
+        // continue its execution with interpreter when returning from the called
+        // method. Because we do not want to execute the called method with the
+        // interpreter, we wrap its execution into the instrumentation stubs.
+        // When the called method returns, it will execute the instrumentation
+        // exit hook that will determine the need of the interpreter with a call
+        // to Dbg::IsForcedInterpreterNeededForUpcall and deoptimize the stack if
+        // it is needed.
+        code = GetQuickInstrumentationEntryPoint();
+      } else {
+        code = called->GetEntryPointFromQuickCompiledCode();
+      }
     } else if (called_class->IsInitializing()) {
-      if (invoke_type == kStatic) {
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
         code = linker->GetQuickOatCodeFor(called);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index b53b8cd..9adb4ac 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1030,7 +1030,8 @@
   NthCallerVisitor visitor(self, 1, true);
   visitor.WalkStack(true);
   bool deoptimize = (visitor.caller != nullptr) &&
-                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller));
+                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
+                    Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
   if (deoptimize) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 26ab602..a3ab026 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -18,6 +18,7 @@
 
 #include <cmath>
 
+#include "debugger.h"
 #include "mirror/array-inl.h"
 #include "unstarted_runtime.h"
 
@@ -616,8 +617,14 @@
           << PrettyMethod(new_shadow_frame->GetMethod());
       UNREACHABLE();
     }
-    (new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter())(self, code_item,
-                                                                    new_shadow_frame, result);
+    // Force the use of interpreter when it is required by the debugger.
+    mirror::EntryPointFromInterpreter* entry;
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForCalling(self, new_shadow_frame->GetMethod()))) {
+      entry = &art::artInterpreterToInterpreterBridge;
+    } else {
+      entry = new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter();
+    }
+    entry(self, code_item, new_shadow_frame, result);
   } else {
     UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index e68616f..09bfbf3 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -631,20 +631,20 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_LAZY);
+  void* handle = dlopen(path_str, RTLD_NOW);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
-      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_LAZY);
+      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_NOW);
       needs_native_bridge = true;
     }
   }
 
-  VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_LAZY) returned " << handle << "]";
+  VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_NOW) returned " << handle << "]";
 
   if (handle == nullptr) {
     *error_msg = dlerror();
-    LOG(ERROR) << "dlopen(\"" << path << "\", RTLD_LAZY) failed: " << *error_msg;
+    VLOG(jni) << "dlopen(\"" << path << "\", RTLD_NOW) failed: " << *error_msg;
     return false;
   }
 
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4bf7142..c9a4483 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -133,7 +133,6 @@
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
       case EK_METHOD_EXIT_WITH_RETURN_VALUE:
-      case EK_SINGLE_STEP:
       case EK_FIELD_ACCESS:
       case EK_FIELD_MODIFICATION:
         return true;
@@ -278,16 +277,7 @@
         Dbg::UnconfigureStep(pMod->step.threadId);
       }
     }
-    if (pEvent->eventKind == EK_SINGLE_STEP) {
-      // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
-      // loop between each single-step. In a IDE, this would happens each time the user click on the
-      // "single-step" button. Here we delay the full undeoptimization to the next resume
-      // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
-      // runtime shutdown).
-      // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
-      // deoptimization and only the last single-step will trigger a full undeoptimization.
-      Dbg::DelayFullUndeoptimization();
-    } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
+    if (NeedsFullDeoptimization(pEvent->eventKind)) {
       CHECK_EQ(req.GetKind(), DeoptimizationRequest::kNothing);
       CHECK(req.Method() == nullptr);
       req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c7083dc..add1394 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -295,7 +295,6 @@
  */
 static JdwpError VM_Resume(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::ProcessDelayedFullUndeoptimizations();
   Dbg::ResumeVM();
   return ERR_NONE;
 }
@@ -989,8 +988,6 @@
     return ERR_NONE;
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   Dbg::ResumeThread(thread_id);
   return ERR_NONE;
 }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 3d69796..e2b88a5 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -322,8 +322,6 @@
     CHECK(event_list_ == nullptr);
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   /*
    * Should not have one of these in progress.  If the debugger went away
    * mid-request, though, we could see this.
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index aa8c717..8a20e39 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -47,33 +47,50 @@
   static constexpr size_t kMaxCapacity = 1 * GB;
   static constexpr size_t kDefaultCapacity = 2 * MB;
 
+  // Create the code cache with a code + data capacity equal to "capacity", error message is passed
+  // in the out arg error_msg.
   static JitCodeCache* Create(size_t capacity, std::string* error_msg);
 
   const uint8_t* CodeCachePtr() const {
     return code_cache_ptr_;
   }
+
   size_t CodeCacheSize() const {
     return code_cache_ptr_ - code_cache_begin_;
   }
+
   size_t CodeCacheRemain() const {
     return code_cache_end_ - code_cache_ptr_;
   }
+
+  const uint8_t* DataCachePtr() const {
+    return data_cache_ptr_;
+  }
+
   size_t DataCacheSize() const {
     return data_cache_ptr_ - data_cache_begin_;
   }
+
   size_t DataCacheRemain() const {
     return data_cache_end_ - data_cache_ptr_;
   }
+
   size_t NumMethods() const {
     return num_methods_;
   }
 
+  // Return true if the code cache contains the code pointer which si the entrypoint of the method.
   bool ContainsMethod(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Return true if the code cache contains a code ptr.
   bool ContainsCodePtr(const void* ptr) const;
 
+  // Reserve a region of code of size at least "size". Returns nullptr if there is no more room.
   uint8_t* ReserveCode(Thread* self, size_t size) LOCKS_EXCLUDED(lock_);
 
+  // Add a data array of size (end - begin) with the associated contents, returns nullptr if there
+  // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
       LOCKS_EXCLUDED(lock_);
 
@@ -81,14 +98,19 @@
   const void* GetCodeFor(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
+  // Save the compiled code for a method so that GetCodeFor(method) will return old_code_ptr if the
+  // entrypoint isn't within the cache.
   void SaveCompiledCode(mirror::ArtMethod* method, const void* old_code_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
  private:
   // Takes ownership of code_mem_map.
   explicit JitCodeCache(MemMap* code_mem_map);
+
+  // Unimplemented, TODO: Determine if it is necessary.
   void FlushInstructionCache();
 
+  // Lock which guards.
   Mutex lock_;
   // Mem map which holds code and data. We do this since we need to have 32 bit offsets from method
   // headers in code cache which point to things in the data cache. If the maps are more than 4GB
@@ -106,7 +128,7 @@
   // TODO: This relies on methods not moving.
   // This map holds code for methods if they were deoptimized by the instrumentation stubs. This is
   // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks.
-  SafeMap<mirror::ArtMethod*, const void*> method_code_map_;
+  SafeMap<mirror::ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JitCodeCache);
 };
diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc
new file mode 100644
index 0000000..2155552
--- /dev/null
+++ b/runtime/jit/jit_code_cache_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+
+#include "class_linker.h"
+#include "jit_code_cache.h"
+#include "mirror/art_method-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace jit {
+
+class JitCodeCacheTest : public CommonRuntimeTest {
+ public:
+};
+
+TEST_F(JitCodeCacheTest, TestCoverage) {
+  std::string error_msg;
+  constexpr size_t kSize = 1 * MB;
+  std::unique_ptr<JitCodeCache> code_cache(
+      JitCodeCache::Create(kSize, &error_msg));
+  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
+  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
+  ASSERT_EQ(code_cache->CodeCacheSize(), 0u);
+  ASSERT_GT(code_cache->CodeCacheRemain(), 0u);
+  ASSERT_TRUE(code_cache->DataCachePtr() != nullptr);
+  ASSERT_EQ(code_cache->DataCacheSize(), 0u);
+  ASSERT_GT(code_cache->DataCacheRemain(), 0u);
+  ASSERT_EQ(code_cache->CodeCacheRemain() + code_cache->DataCacheRemain(), kSize);
+  ASSERT_EQ(code_cache->NumMethods(), 0u);
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  uint8_t* const reserved_code = code_cache->ReserveCode(soa.Self(), 4 * KB);
+  ASSERT_TRUE(reserved_code != nullptr);
+  ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code));
+  ASSERT_EQ(code_cache->NumMethods(), 1u);
+  ClassLinker* const cl = Runtime::Current()->GetClassLinker();
+  auto h_method = hs.NewHandle(cl->AllocArtMethod(soa.Self()));
+  ASSERT_FALSE(code_cache->ContainsMethod(h_method.Get()));
+  h_method->SetEntryPointFromQuickCompiledCode(reserved_code);
+  ASSERT_TRUE(code_cache->ContainsMethod(h_method.Get()));
+  ASSERT_EQ(code_cache->GetCodeFor(h_method.Get()), reserved_code);
+  // Save the code and then change it.
+  code_cache->SaveCompiledCode(h_method.Get(), reserved_code);
+  h_method->SetEntryPointFromQuickCompiledCode(nullptr);
+  ASSERT_EQ(code_cache->GetCodeFor(h_method.Get()), reserved_code);
+  const uint8_t data_arr[] = {1, 2, 3, 4, 5};
+  uint8_t* data_ptr = code_cache->AddDataArray(soa.Self(), data_arr, data_arr + sizeof(data_arr));
+  ASSERT_TRUE(data_ptr != nullptr);
+  ASSERT_EQ(memcmp(data_ptr, data_arr, sizeof(data_arr)), 0);
+}
+
+TEST_F(JitCodeCacheTest, TestOverflow) {
+  std::string error_msg;
+  constexpr size_t kSize = 1 * MB;
+  std::unique_ptr<JitCodeCache> code_cache(
+      JitCodeCache::Create(kSize, &error_msg));
+  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
+  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
+  size_t code_bytes = 0;
+  size_t data_bytes = 0;
+  constexpr size_t kCodeArrSize = 4 * KB;
+  constexpr size_t kDataArrSize = 4 * KB;
+  uint8_t data_arr[kDataArrSize] = {53};
+  // Add code and data until we are full.
+  uint8_t* code_ptr = nullptr;
+  uint8_t* data_ptr = nullptr;
+  do {
+    code_ptr = code_cache->ReserveCode(Thread::Current(), kCodeArrSize);
+    data_ptr = code_cache->AddDataArray(Thread::Current(), data_arr, data_arr + kDataArrSize);
+    if (code_ptr != nullptr) {
+      code_bytes += kCodeArrSize;
+    }
+    if (data_ptr != nullptr) {
+      data_bytes += kDataArrSize;
+    }
+  } while (code_ptr != nullptr || data_ptr != nullptr);
+  // Make sure we added a reasonable amount
+  CHECK_GT(code_bytes, 0u);
+  CHECK_LE(code_bytes, kSize);
+  CHECK_GT(data_bytes, 0u);
+  CHECK_LE(data_bytes, kSize);
+  CHECK_GE(code_bytes + data_bytes, kSize * 4 / 5);
+}
+
+}  // namespace jit
+}  // namespace art
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index ffee59e..c1f7594 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -401,7 +401,9 @@
 
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
-  if (UNLIKELY(!runtime->IsStarted())) {
+  // If the runtime is not yet started or it is required by the debugger, then perform the
+  // Invocation by the interpreter.
+  if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
       art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
     } else {
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 84b18ab..bd043a8 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -30,6 +30,12 @@
 #include "ScopedUtfChars.h"
 #include "verify_object-inl.h"
 
+#include <sstream>
+#ifdef HAVE_ANDROID_OS
+// This function is provided by android linker.
+extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
+#endif  // HAVE_ANDROID_OS
+
 namespace art {
 
 static void Runtime_gc(JNIEnv*, jclass) {
@@ -46,30 +52,53 @@
   exit(status);
 }
 
-static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPath) {
-  // TODO: returns NULL on success or an error message describing the failure on failure. This
-  // should be refactored in terms of suppressed exceptions.
-  ScopedUtfChars filename(env, javaFilename);
-  if (filename.c_str() == NULL) {
-    return NULL;
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) {
+#ifdef HAVE_ANDROID_OS
+  std::stringstream ss;
+  if (javaLdLibraryPathJstr != nullptr) {
+    ScopedUtfChars javaLdLibraryPath(env, javaLdLibraryPathJstr);
+    if (javaLdLibraryPath.c_str() != nullptr) {
+      ss << javaLdLibraryPath.c_str();
+    }
   }
 
-  if (javaLdLibraryPath != NULL) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
-    if (ldLibraryPath.c_str() == NULL) {
-      return NULL;
-    }
-    void* sym = dlsym(RTLD_DEFAULT, "android_update_LD_LIBRARY_PATH");
-    if (sym != NULL) {
-      typedef void (*Fn)(const char*);
-      Fn android_update_LD_LIBRARY_PATH = reinterpret_cast<Fn>(sym);
-      (*android_update_LD_LIBRARY_PATH)(ldLibraryPath.c_str());
-    } else {
-      LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  if (javaDexPathJstr != nullptr) {
+    ScopedUtfChars javaDexPath(env, javaDexPathJstr);
+    if (javaDexPath.c_str() != nullptr) {
+      std::vector<std::string> dexPathVector;
+      Split(javaDexPath.c_str(), ':', &dexPathVector);
+
+      for (auto abi : art::Runtime::Current()->GetCpuAbilist()) {
+        for (auto zip_path : dexPathVector) {
+          // Native libraries live under lib/<abi>/ inside .apk file.
+          ss << ":" << zip_path << "!" << "lib/" << abi;
+        }
+      }
     }
   }
 
+  std::string ldLibraryPathStr = ss.str();
+  const char* ldLibraryPath = ldLibraryPathStr.c_str();
+  if (*ldLibraryPath == ':') {
+    ++ldLibraryPath;
+  }
+
+  android_update_LD_LIBRARY_PATH(ldLibraryPath);
+#else
+  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  UNUSED(javaLdLibraryPathJstr, javaDexPathJstr, env);
+#endif
+}
+
+static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
+                                  jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (filename.c_str() == nullptr) {
+    return nullptr;
+  }
+
+  SetLdLibraryPath(env, javaLdLibraryPathJstr, javaDexPathJstr);
+
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
@@ -101,7 +130,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index a53aeaa..337c5df 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -255,6 +255,9 @@
           .IntoKey(M::ZygoteMaxFailedBoots)
       .Define("-Xno-dex-file-fallback")
           .IntoKey(M::NoDexFileFallback)
+      .Define("--cpu-abilist=_")
+          .WithType<std::string>()
+          .IntoKey(M::CpuAbiList)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0f0c327..2dacfe2 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -792,6 +792,8 @@
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
+  Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
+
   if (runtime_options.GetOrDefault(Opt::Interpret)) {
     GetInstrumentation()->ForceInterpretOnly();
   }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 7f33547..9a04835 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -524,6 +524,10 @@
     return allow_dex_file_fallback_;
   }
 
+  const std::vector<std::string>& GetCpuAbilist() const {
+    return cpu_abilist_;
+  }
+
   bool RunningOnValgrind() const {
     return running_on_valgrind_;
   }
@@ -706,6 +710,9 @@
   // available/usable.
   bool allow_dex_file_fallback_;
 
+  // List of supported cpu abis.
+  std::vector<std::string> cpu_abilist_;
+
   // Specifies target SDK version to allow workarounds for certain API levels.
   int32_t target_sdk_version_;
 
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 895ab5c..1f273cf 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -104,6 +104,7 @@
                                           ImageCompilerOptions)  // -Ximage-compiler-option ...
 RUNTIME_OPTIONS_KEY (bool,                Verify,                         true)
 RUNTIME_OPTIONS_KEY (std::string,         NativeBridge)
+RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 
 // Not parse-able from command line, but can be provided explicitly.
 RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 78185bf..00b4cef 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -269,7 +269,7 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 40be56c..76ef4a9 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -57,14 +57,15 @@
       // We eliminate the non-live registers at a return, so only v3 is live.
       // Note that it is OK for a compiler to not have a dex map at this dex PC because
       // a return is not necessarily a safepoint.
-      CHECK_REGS_CONTAIN_REFS(0x13U, false);  // v3: y
-      CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x13U, false, 3);  // v3: y
+      // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
+      CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
       // v5 is removed from the root set because there is a "merge" operation.
       // See 0015: if-nez v2, 001f.
-      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
       CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
diff --git a/test/134-reg-promotion/smali/Test.smali b/test/134-reg-promotion/smali/Test.smali
index 6a35c45..68d29dd 100644
--- a/test/134-reg-promotion/smali/Test.smali
+++ b/test/134-reg-promotion/smali/Test.smali
@@ -36,3 +36,28 @@
    :end
    return-void
 .end method
+
+.method public static run2()V
+   .registers 4
+   new-instance v2, Ljava/lang/String;
+   invoke-direct {v2}, Ljava/lang/String;-><init>()V
+   const/4 v0, 0
+   move v1, v0
+   :start
+   invoke-static {}, LMain;->blowup()V
+   if-ne v1, v0, :end
+   const/4 v2, 1
+   invoke-static {v2}, Ljava/lang/Integer;->toString(I)Ljava/lang/String;
+   move-result-object v3
+   if-nez v3, :skip
+   const/4 v0, 0
+   :skip
+   # The Phi merging 0 with 0 hides the constant from the Quick compiler.
+   move v2, v0
+   # The call makes v2 float type.
+   invoke-static {v2}, Ljava/lang/Float;->isNaN(F)Z
+   const/4 v1, 1
+   goto :start
+   :end
+   return-void
+.end method
diff --git a/test/134-reg-promotion/src/Main.java b/test/134-reg-promotion/src/Main.java
index d45ec66..008ac58 100644
--- a/test/134-reg-promotion/src/Main.java
+++ b/test/134-reg-promotion/src/Main.java
@@ -38,5 +38,11 @@
             m.invoke(null, (Object[]) null);
             holder = null;
         }
+        m = c.getMethod("run2", (Class[]) null);
+        for (int i = 0; i < 10; i++) {
+            holder = new char[128 * 1024][];
+            m.invoke(null, (Object[]) null);
+            holder = null;
+        }
     }
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 9755efb..28fbc3e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -95,9 +95,9 @@
   RELOCATE_TYPES += no-relocate
 endif
 ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true)
-  RELOCATE_TYPES := relocate-no-patchoat
+  RELOCATE_TYPES := relocate-npatchoat
 endif
-TRACE_TYPES := no-trace
+TRACE_TYPES := ntrace
 ifeq ($(ART_TEST_TRACE),true)
   TRACE_TYPES += trace
 endif
@@ -119,7 +119,7 @@
 ifeq ($(ART_TEST_PIC_IMAGE),true)
   IMAGE_TYPES += picimage
 endif
-PICTEST_TYPES := nopictest
+PICTEST_TYPES := npictest
 ifeq ($(ART_TEST_PIC_TEST),true)
   PICTEST_TYPES += pictest
 endif
@@ -130,7 +130,7 @@
 ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true)
   RUN_TYPES += ndebug
 endif
-DEBUGGABLE_TYPES := nondebuggable
+DEBUGGABLE_TYPES := ndebuggable
 ifeq ($(ART_TEST_RUN_TEST_DEBUGGABLE),true)
 DEBUGGABLE_TYPES += debuggable
 endif
@@ -272,9 +272,9 @@
       $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
-ifneq (,$(filter relocate-no-patchoat,$(RELOCATE_TYPES)))
+ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), relocate-no-patchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(COMPILER_TYPES), relocate-npatchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -375,7 +375,7 @@
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),nondebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+      $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS :=
@@ -461,10 +461,10 @@
 
 # Create a rule to build and run a tests following the form:
 # test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
-#    {4: interpreter default optimizing jit}-{5: relocate no-relocate relocate-no-patchoat}-
-#    {6: trace or no-trace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
-#    {9: no-image image picimage}-{10: pictest nopictest}-
-#    {11: nondebuggable debuggable}-{12: test name}{13: 32 or 64}
+#    {4: interpreter default optimizing jit}-{5: relocate nrelocate relocate-npatchoat}-
+#    {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
+#    {9: no-image image picimage}-{10: pictest npictest}-
+#    {11: ndebuggable debuggable}-{12: test name}{13: 32 or 64}
 define define-test-art-run-test
   run_test_options :=
   prereq_rule :=
@@ -543,7 +543,7 @@
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_RELOCATE_RULES
       run_test_options += --no-relocate
     else
-      ifeq ($(5),relocate-no-patchoat)
+      ifeq ($(5),relocate-npatchoat)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_NO_PATCHOAT_RULES
         run_test_options += --relocate --no-patchoat
       else
@@ -555,7 +555,7 @@
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES
     run_test_options += --trace
   else
-    ifeq ($(6),no-trace)
+    ifeq ($(6),ntrace)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_TRACE_RULES
     else
       $$(error found $(6) expected $(TRACE_TYPES))
@@ -635,7 +635,7 @@
   ifeq ($(10),pictest)
     run_test_options += --pic-test
   else
-    ifeq ($(10),nopictest)
+    ifeq ($(10),npictest)
       # Nothing to be done.
     else
       $$(error found $(10) expected $(PICTEST_TYPES))
@@ -645,7 +645,7 @@
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUGGABLE_RULES
     run_test_options += --debuggable
   else
-    ifeq ($(11),nondebuggable)
+    ifeq ($(11),ndebuggable)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NONDEBUGGABLE_RULES
       # Nothing to be done.
     else
diff --git a/test/run-test b/test/run-test
index df0fce4..2873a35 100755
--- a/test/run-test
+++ b/test/run-test
@@ -441,8 +441,8 @@
         echo "    --build-only          Build test files only (off by default)."
         echo "    --interpreter         Enable interpreter only mode (off by default)."
         echo "    --jit                 Enable jit (off by default)."
-        echo "    --optimizing          Enable optimizing compiler (off by default)."
-        echo "    --quick               Use Quick compiler (default)."
+        echo "    --optimizing          Enable optimizing compiler (default)."
+        echo "    --quick               Use Quick compiler (off by default)."
         echo "    --no-verify           Turn off verification (on by default)."
         echo "    --no-optimize         Turn off optimization (on by default)."
         echo "    --no-precise          Turn off precise GC (on by default)."