Merge "Share dex register maps between stack maps when possible."
diff --git a/Android.mk b/Android.mk
index c740a0d..216e865 100644
--- a/Android.mk
+++ b/Android.mk
@@ -360,6 +360,7 @@
 		--boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \
 		--dex-location=/$(1) --oat-file=$$@ \
 		--instruction-set=$(DEX2OAT_TARGET_ARCH) \
+		--instruction-set-variant=$(DEX2OAT_TARGET_CPU_VARIANT) \
 		--instruction-set-features=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
 		--android-root=$(PRODUCT_OUT)/system --include-patch-information \
 		--runtime-arg -Xnorelocate
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 08b4ec2..8f00298 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -219,6 +219,10 @@
   art_cflags += -DART_USE_READ_BARRIER=1
 endif
 
+ifeq ($(ART_USE_TLAB),true)
+  art_cflags += -DART_USE_TLAB=1
+endif
+
 # Cflags for non-debug ART and ART tools.
 art_non_debug_cflags := \
   -O3
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 6b6a9e0..948c756 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -159,6 +159,7 @@
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/jit_code_cache_test.cc \
   runtime/leb128_test.cc \
   runtime/mem_map_test.cc \
   runtime/memory_region_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 4d2fa41..710b130 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -230,6 +230,7 @@
 	  $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$(PRIVATE_CORE_OAT_NAME) \
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(3)TARGET_ARCH) \
+	  --instruction-set-variant=$$($(3)DEX2OAT_TARGET_CPU_VARIANT) \
 	  --instruction-set-features=$$($(3)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
 	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1)
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 0906753..6b0e6ff 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -94,6 +94,7 @@
 	jni/quick/x86_64/calling_convention_x86_64.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
+	optimizing/boolean_simplifier.cc \
 	optimizing/builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/code_generator.cc \
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 266b7c3..c85c3b6 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -671,6 +671,9 @@
               }
               int dead_true_def = if_true->ssa_rep->defs[0];
               raw_use_counts_[dead_true_def] = use_counts_[dead_true_def] = 0;
+              // Update ending vreg->sreg map for GC maps generation.
+              int def_vreg = SRegToVReg(mir->ssa_rep->defs[0]);
+              bb->data_flow_info->vreg_to_ssa_map_exit[def_vreg] = mir->ssa_rep->defs[0];
               // We want to remove ft and tk and link bb directly to ft_ft. First, we need
               // to update all Phi inputs correctly with UpdatePredecessor(ft->id, bb->id)
               // since the live_def above comes from ft->first_mir_insn (if_false).
diff --git a/compiler/dex/pass_manager.cc b/compiler/dex/pass_manager.cc
index 6d58f65..6377a6c 100644
--- a/compiler/dex/pass_manager.cc
+++ b/compiler/dex/pass_manager.cc
@@ -33,7 +33,7 @@
   // Add each pass which isn't disabled into default_pass_list_.
   for (const auto* pass : passes_) {
     if (options_.GetDisablePassList().find(pass->GetName()) != std::string::npos) {
-      LOG(INFO) << "Skipping disabled pass " << pass->GetName();
+      VLOG(compiler) << "Skipping disabled pass " << pass->GetName();
     } else {
       default_pass_list_.push_back(pass);
     }
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 8833da3..3e69878 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -848,7 +848,7 @@
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
                  "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
@@ -1502,7 +1502,7 @@
           break;
         }
         case kFixupAdr: {
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[2]);
           LIR* target = lir->target;
           int32_t target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
               : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
@@ -1555,8 +1555,8 @@
         }
         case kFixupMovImmLST: {
           // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
           // If tab_rec is null, this is a literal load. Use target
           LIR* target = lir->target;
           int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
@@ -1565,8 +1565,8 @@
         }
         case kFixupMovImmHST: {
           // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
           // If tab_rec is null, this is a literal load. Use target
           LIR* target = lir->target;
           int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 1a9dbea..d46c25a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -433,7 +433,7 @@
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
-            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
+            : LIRSlowPath(m2l, branch), restore_lr_(restore_lr),
               sp_displace_(sp_displace) {
         }
         void Compile() OVERRIDE {
@@ -658,7 +658,7 @@
   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
   // as a placeholder for the offset.
   LIR* call = RawLIR(current_dalvik_offset_, kThumb2Bl, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+                     target_method_idx, WrapPointer(target_dex_file), type);
   AppendLIR(call);
   call_method_insns_.push_back(call);
   return call;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 67fabbd..4141bcf 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -200,7 +200,7 @@
     void UpdateIT(LIR* it, const char* new_guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    void OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index ef26323..9193e1b 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1081,9 +1081,10 @@
   return true;
 }
 
-LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
+  LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0);
+  lir->target = target;
 }
 
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 13f9072..9812d9f 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -455,7 +455,7 @@
            case 'T':
              snprintf(tbuf, arraysize(tbuf), "%s", PrettyMethod(
                  static_cast<uint32_t>(lir->operands[1]),
-                 *reinterpret_cast<const DexFile*>(UnwrapPointer(lir->operands[2]))).c_str());
+                 *UnwrapPointer<DexFile>(lir->operands[2])).c_str());
              break;
            case 'u': {
              int offset_1 = lir->operands[0];
@@ -906,9 +906,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kThumb2Bl);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
-
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
       patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
                                                         target_dex_file, target_method_idx));
   }
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index aa5e5b4..329bb1e 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -1003,7 +1003,7 @@
                                       0 : offset_adjustment) + target_lir->offset;
             delta = target_offs - lir->offset;
           } else if (lir->operands[2] >= 0) {
-            EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+            const EmbeddedData* tab = UnwrapPointer<EmbeddedData>(lir->operands[2]);
             delta = tab->offset + offset_adjustment - lir->offset;
           } else {
             // No fixup: this usage allows to retrieve the current PC.
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 8275162..823cb60 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -351,8 +351,8 @@
     if (generate_explicit_stack_overflow_check) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
-        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
-              LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr),
+        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
+            : LIRSlowPath(m2l, branch),
               sp_displace_(sp_displace) {
         }
         void Compile() OVERRIDE {
@@ -525,7 +525,7 @@
   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
   // as a placeholder for the offset.
   LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+                     target_method_idx, WrapPointer(target_dex_file), type);
   AppendLIR(call);
   call_method_insns_.push_back(call);
   return call;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index d5f0536..54fd46d 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -203,7 +203,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
   void OpEndIT(LIR* it) OVERRIDE;
   LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
   LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
   void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index a8ec6c0..49b15fe 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -449,7 +449,7 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
-  int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f));
+  int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f));
   A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 92675f3..2372ccc 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -937,9 +937,10 @@
   return true;
 }
 
-LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, kA64Ldr2rp, As32BitReg(reg).GetReg(), 0, 0, 0, 0, target);
+  LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0);
+  lir->target = target;
 }
 
 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 136be94..09a34bf 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -851,9 +851,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kA64Bl1t);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
-
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
       patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
                                                         target_dex_file, target_method_idx));
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 029c0ca..bd479be 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -16,6 +16,7 @@
 
 #include "mir_to_lir-inl.h"
 
+#include "base/bit_vector-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -88,6 +89,8 @@
   inst->u.m.def_mask = &kEncodeAll;
   LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
   DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
+  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
+  safepoints_.emplace_back(safepoint_pc, current_mir_);
 }
 
 void Mir2Lir::MarkSafepointPCAfter(LIR* after) {
@@ -102,6 +105,8 @@
     InsertLIRAfter(after, safepoint_pc);
   }
   DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
+  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
+  safepoints_.emplace_back(safepoint_pc, current_mir_);
 }
 
 /* Remove a LIR from the list. */
@@ -217,7 +222,7 @@
       }
       LOG(INFO) << "-------- dalvik offset: 0x" << std::hex
                 << lir->dalvik_offset << " @ "
-                << reinterpret_cast<char*>(UnwrapPointer(lir->operands[0]));
+                << UnwrapPointer<char>(lir->operands[0]);
       break;
     case kPseudoExitBlock:
       LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest;
@@ -411,7 +416,7 @@
 LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) {
   while (data_target) {
     if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index &&
-        UnwrapPointer(data_target->operands[1]) == method.dex_file) {
+        UnwrapPointer<DexFile>(data_target->operands[1]) == method.dex_file) {
       return data_target;
     }
     data_target = data_target->next;
@@ -423,7 +428,7 @@
 LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) {
   while (data_target) {
     if (static_cast<uint32_t>(data_target->operands[0]) == type_idx &&
-        UnwrapPointer(data_target->operands[1]) == &dex_file) {
+        UnwrapPointer<DexFile>(data_target->operands[1]) == &dex_file) {
       return data_target;
     }
     data_target = data_target->next;
@@ -486,8 +491,7 @@
   data_lir = code_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
                                               target_dex_file, target_method_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -496,8 +500,7 @@
   data_lir = method_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
                                                 target_dex_file, target_method_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -507,8 +510,7 @@
   data_lir = class_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_type_idx = data_lir->operands[0];
-    const DexFile* class_dex_file =
-      reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* class_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
                                               class_dex_file, target_type_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -767,6 +769,71 @@
 }
 
 void Mir2Lir::CreateNativeGcMap() {
+  if (UNLIKELY((cu_->disable_opt & (1u << kPromoteRegs)) != 0u)) {
+    // If we're not promoting to physical registers, it's safe to use the verifier's notion of
+    // references. (We disable register promotion when type inference finds a type conflict and
+    // in that the case we defer to the verifier to avoid using the compiler's conflicting info.)
+    CreateNativeGcMapWithoutRegisterPromotion();
+    return;
+  }
+
+  ArenaBitVector* references = new (arena_) ArenaBitVector(arena_, mir_graph_->GetNumSSARegs(),
+                                                           false);
+
+  // Calculate max native offset and max reference vreg.
+  MIR* prev_mir = nullptr;
+  int max_ref_vreg = -1;
+  CodeOffset max_native_offset = 0u;
+  for (const auto& entry : safepoints_) {
+    uint32_t native_offset = entry.first->offset;
+    max_native_offset = std::max(max_native_offset, native_offset);
+    MIR* mir = entry.second;
+    UpdateReferenceVRegs(mir, prev_mir, references);
+    max_ref_vreg = std::max(max_ref_vreg, references->GetHighestBitSet());
+    prev_mir = mir;
+  }
+
+#if defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
+  static constexpr bool kLittleEndian = true;
+#else
+  static constexpr bool kLittleEndian = false;
+#endif
+
+  // Build the GC map.
+  uint32_t reg_width = static_cast<uint32_t>((max_ref_vreg + 8) / 8);
+  GcMapBuilder native_gc_map_builder(&native_gc_map_,
+                                     safepoints_.size(),
+                                     max_native_offset, reg_width);
+  if (kLittleEndian) {
+    for (const auto& entry : safepoints_) {
+      uint32_t native_offset = entry.first->offset;
+      MIR* mir = entry.second;
+      UpdateReferenceVRegs(mir, prev_mir, references);
+      // For little-endian, the bytes comprising the bit vector's raw storage are what we need.
+      native_gc_map_builder.AddEntry(native_offset,
+                                     reinterpret_cast<const uint8_t*>(references->GetRawStorage()));
+      prev_mir = mir;
+    }
+  } else {
+    ArenaVector<uint8_t> references_buffer(arena_->Adapter());
+    references_buffer.resize(reg_width);
+    for (const auto& entry : safepoints_) {
+      uint32_t native_offset = entry.first->offset;
+      MIR* mir = entry.second;
+      UpdateReferenceVRegs(mir, prev_mir, references);
+      // Big-endian or unknown endianness, manually translate the bit vector data.
+      const auto* raw_storage = references->GetRawStorage();
+      for (size_t i = 0; i != reg_width; ++i) {
+        references_buffer[i] = static_cast<uint8_t>(
+            raw_storage[i / sizeof(raw_storage[0])] >> (8u * (i % sizeof(raw_storage[0]))));
+      }
+      native_gc_map_builder.AddEntry(native_offset, &references_buffer[0]);
+      prev_mir = mir;
+    }
+  }
+}
+
+void Mir2Lir::CreateNativeGcMapWithoutRegisterPromotion() {
   DCHECK(!encoded_mapping_table_.empty());
   MappingTable mapping_table(&encoded_mapping_table_[0]);
   uint32_t max_native_offset = 0;
@@ -965,6 +1032,7 @@
       block_label_list_(nullptr),
       promotion_map_(nullptr),
       current_dalvik_offset_(0),
+      current_mir_(nullptr),
       estimated_native_code_size_(0),
       reg_pool_(nullptr),
       live_sreg_(0),
@@ -984,6 +1052,7 @@
       slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
       mask_cache_(arena),
+      safepoints_(arena->Adapter()),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
@@ -992,7 +1061,7 @@
   pointer_storage_.reserve(128);
   slow_paths_.reserve(32);
   // Reserve pointer id 0 for nullptr.
-  size_t null_idx = WrapPointer(nullptr);
+  size_t null_idx = WrapPointer<void>(nullptr);
   DCHECK_EQ(null_idx, 0U);
 }
 
@@ -1201,8 +1270,7 @@
     data_target->operands[2] = type;
   }
   // Loads a code pointer. Code from oat file can be mapped anywhere.
-  LIR* load_pc_rel = OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
   DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
 }
@@ -1219,8 +1287,7 @@
     data_target->operands[2] = type;
   }
   // Loads an ArtMethod pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
   DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
 }
@@ -1234,8 +1301,7 @@
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file));
   }
   // Loads a Class pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
 }
 
 std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
@@ -1274,4 +1340,97 @@
   UNREACHABLE();
 }
 
+void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
+  // Mark the references coming from the first predecessor.
+  DCHECK(bb != nullptr);
+  DCHECK(bb->block_type == kEntryBlock || !bb->predecessors.empty());
+  BasicBlock* first_bb =
+      (bb->block_type == kEntryBlock) ? bb : mir_graph_->GetBasicBlock(bb->predecessors[0]);
+  DCHECK(first_bb != nullptr);
+  DCHECK(first_bb->data_flow_info != nullptr);
+  DCHECK(first_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
+  const int32_t* first_vreg_to_ssa_map = first_bb->data_flow_info->vreg_to_ssa_map_exit;
+  references->ClearAllBits();
+  for (uint32_t vreg = 0, num_vregs = mir_graph_->GetNumOfCodeVRs(); vreg != num_vregs; ++vreg) {
+    int32_t sreg = first_vreg_to_ssa_map[vreg];
+    if (sreg != INVALID_SREG && mir_graph_->reg_location_[sreg].ref &&
+        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[sreg])) {
+      references->SetBit(vreg);
+    }
+  }
+  // Unmark the references that are merging with a different value.
+  for (size_t i = 1u, num_pred = bb->predecessors.size(); i < num_pred; ++i) {
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[i]);
+    DCHECK(pred_bb != nullptr);
+    DCHECK(pred_bb->data_flow_info != nullptr);
+    DCHECK(pred_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
+    const int32_t* pred_vreg_to_ssa_map = pred_bb->data_flow_info->vreg_to_ssa_map_exit;
+    for (uint32_t vreg : references->Indexes()) {
+      if (first_vreg_to_ssa_map[vreg] != pred_vreg_to_ssa_map[vreg]) {
+        // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
+        // so clearing the bit has no effect on the iterator.
+        references->ClearBit(vreg);
+      }
+    }
+  }
+  if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
+      static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
+    // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
+    // instruction to the previous block. However, the MIRGraph data used above
+    // doesn't reflect that, so we still need to process that MIR insn here.
+    DCHECK_EQ(bb->predecessors.size(), 1u);
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
+    DCHECK(pred_bb != nullptr);
+    DCHECK(pred_bb->last_mir_insn != nullptr);
+    UpdateReferenceVRegsLocal(nullptr, pred_bb->last_mir_insn, references);
+  }
+}
+
+bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
+  DCHECK(mir == nullptr || mir->bb == prev_mir->bb);
+  DCHECK(prev_mir != nullptr);
+  while (prev_mir != nullptr) {
+    if (prev_mir == mir) {
+      return true;
+    }
+    const size_t num_defs = prev_mir->ssa_rep->num_defs;
+    const int32_t* defs = prev_mir->ssa_rep->defs;
+    if (num_defs == 1u && mir_graph_->reg_location_[defs[0]].ref &&
+        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[defs[0]])) {
+      references->SetBit(mir_graph_->SRegToVReg(defs[0]));
+    } else {
+      for (size_t i = 0u; i != num_defs; ++i) {
+        references->ClearBit(mir_graph_->SRegToVReg(defs[i]));
+      }
+    }
+    prev_mir = prev_mir->next;
+  }
+  return false;
+}
+
+void Mir2Lir::UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references) {
+  if (mir == nullptr) {
+    // Safepoint in entry sequence.
+    InitReferenceVRegs(mir_graph_->GetEntryBlock(), references);
+    return;
+  }
+  if (IsInstructionReturn(mir->dalvikInsn.opcode) ||
+      mir->dalvikInsn.opcode == Instruction::RETURN_VOID_NO_BARRIER) {
+    references->ClearAllBits();
+    if (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT) {
+      references->SetBit(mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]));
+    }
+    return;
+  }
+  if (prev_mir != nullptr && mir->bb == prev_mir->bb &&
+      UpdateReferenceVRegsLocal(mir, prev_mir, references)) {
+    return;
+  }
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  InitReferenceVRegs(bb, references);
+  bool success = UpdateReferenceVRegsLocal(mir, bb->first_mir_insn, references);
+  DCHECK(success) << "MIR @0x" << std::hex << mir->offset << " not in BB#" << std::dec << mir->bb;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 32a469d..2bcaaca 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -86,7 +86,7 @@
   class DivZeroCheckSlowPath : public Mir2Lir::LIRSlowPath {
    public:
     DivZeroCheckSlowPath(Mir2Lir* m2l, LIR* branch_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in) {
+        : LIRSlowPath(m2l, branch_in) {
     }
 
     void Compile() OVERRIDE {
@@ -105,7 +105,7 @@
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, RegStorage index_in,
                              RegStorage length_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), length_(length_in) {
     }
 
@@ -129,7 +129,7 @@
   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, int index_in, RegStorage length_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), length_(length_in) {
     }
 
@@ -159,7 +159,7 @@
   class NullCheckSlowPath : public Mir2Lir::LIRSlowPath {
    public:
     NullCheckSlowPath(Mir2Lir* m2l, LIR* branch)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch) {
+        : LIRSlowPath(m2l, branch) {
     }
 
     void Compile() OVERRIDE {
@@ -581,7 +581,7 @@
   // At least one will be non-null here, otherwise we wouldn't generate the slow path.
   StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
                       RegStorage r_base)
-      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), unresolved != nullptr ? unresolved : uninit, cont),
+      : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
         second_branch_(unresolved != nullptr ? uninit : nullptr),
         storage_index_(storage_index), r_base_(r_base) {
   }
@@ -1052,9 +1052,9 @@
       class SlowPath : public LIRSlowPath {
        public:
         SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
-                 const RegLocation& rl_method_in, const RegLocation& rl_result_in) :
-                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont_in),
-                   type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) {
+                 const RegLocation& rl_method_in, const RegLocation& rl_result_in)
+            : LIRSlowPath(m2l, fromfast, cont_in),
+              type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) {
         }
 
         void Compile() {
@@ -1120,9 +1120,9 @@
       class SlowPath : public LIRSlowPath {
        public:
         SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in,
-                 int32_t string_idx_in) :
-            LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast_in, cont_in),
-            r_method_(r_method_in), string_idx_(string_idx_in) {
+                 int32_t string_idx_in)
+            : LIRSlowPath(m2l, fromfast_in, cont_in),
+              r_method_(r_method_in), string_idx_(string_idx_in) {
         }
 
         void Compile() {
@@ -1304,7 +1304,7 @@
        public:
         InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in,
                          RegLocation rl_src_in)
-            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont), type_idx_(type_idx_in),
+            : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in),
               rl_src_(rl_src_in) {
         }
 
@@ -1453,9 +1453,9 @@
       class SlowPath : public LIRSlowPath {
        public:
         SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
-                 const RegStorage class_reg_in) :
-                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont_in),
-                   type_idx_(type_idx_in), class_reg_(class_reg_in) {
+                 const RegStorage class_reg_in)
+            : LIRSlowPath(m2l, fromfast, cont_in),
+              type_idx_(type_idx_in), class_reg_(class_reg_in) {
         }
 
         void Compile() {
@@ -1484,8 +1484,8 @@
   // to call a helper function to do the check.
   class SlowPath : public LIRSlowPath {
    public:
-    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load):
-               LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), load_(load) {
+    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load)
+        : LIRSlowPath(m2l, fromfast, cont), load_(load) {
     }
 
     void Compile() {
@@ -1874,8 +1874,8 @@
     int32_t divisor = mir_graph_->ConstantValue(rl_src2);
     if (CanDivideByReciprocalMultiplyFloat(divisor)) {
       // Generate multiply by reciprocal instead of div.
-      float recip = 1.0f/bit_cast<int32_t, float>(divisor);
-      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<float, int32_t>(recip));
+      float recip = 1.0f/bit_cast<float, int32_t>(divisor);
+      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<int32_t, float>(recip));
       return true;
     }
   } else {
@@ -1883,7 +1883,7 @@
     if (CanDivideByReciprocalMultiplyDouble(divisor)) {
       // Generate multiply by reciprocal instead of div.
       double recip = 1.0/bit_cast<double, int64_t>(divisor);
-      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<double, int64_t>(recip));
+      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<int64_t, double>(recip));
       return true;
     }
   }
@@ -2151,9 +2151,6 @@
   RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
   LoadConstantNoClobber(rl_result.reg, value);
   StoreValue(rl_dest, rl_result);
-  if (value == 0) {
-    Workaround7250540(rl_dest, rl_result.reg);
-  }
 }
 
 void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest,
@@ -2179,7 +2176,7 @@
 class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
   SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
-      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) {
+      : LIRSlowPath(m2l, branch, cont) {
   }
 
   void Compile() OVERRIDE {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6b553fd..2d41ba1 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -48,7 +48,8 @@
   class IntrinsicSlowPathPath : public Mir2Lir::LIRSlowPath {
    public:
     IntrinsicSlowPathPath(Mir2Lir* m2l, CallInfo* info_in, LIR* branch_in, LIR* resume_in)
-        : LIRSlowPath(m2l, info_in->offset, branch_in, resume_in), info_(info_in) {
+        : LIRSlowPath(m2l, branch_in, resume_in), info_(info_in) {
+      DCHECK_EQ(info_in->offset, current_dex_pc_);
     }
 
     void Compile() {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index db844bc..b71691f 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -37,48 +37,6 @@
 }
 
 /*
- * Temporary workaround for Issue 7250540.  If we're loading a constant zero into a
- * promoted floating point register, also copy a zero into the int/ref identity of
- * that sreg.
- */
-void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
-  if (rl_dest.fp) {
-    int pmap_index = SRegToPMap(rl_dest.s_reg_low);
-    const bool is_fp_promoted = promotion_map_[pmap_index].fp_location == kLocPhysReg;
-    const bool is_core_promoted = promotion_map_[pmap_index].core_location == kLocPhysReg;
-    if (is_fp_promoted || is_core_promoted) {
-      // Now, determine if this vreg is ever used as a reference.  If not, we're done.
-      bool used_as_reference = false;
-      int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-      for (int i = 0; !used_as_reference && (i < mir_graph_->GetNumSSARegs()); i++) {
-        if (mir_graph_->SRegToVReg(mir_graph_->reg_location_[i].s_reg_low) == base_vreg) {
-          used_as_reference |= mir_graph_->reg_location_[i].ref;
-        }
-      }
-      if (!used_as_reference) {
-        return;
-      }
-      RegStorage temp_reg = zero_reg;
-      if (!temp_reg.Valid()) {
-        temp_reg = AllocTemp();
-        LoadConstant(temp_reg, 0);
-      }
-      if (is_core_promoted) {
-        // Promoted - just copy in a zero
-        OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
-      } else {
-        // Lives in the frame, need to store.
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32, kNotVolatile);
-      }
-      if (!zero_reg.Valid()) {
-        FreeTemp(temp_reg);
-      }
-    }
-  }
-}
-
-/*
  * Load a Dalvik register into a physical register.  Take care when
  * using this routine, as it doesn't perform any bookkeeping regarding
  * register liveness.  That is the responsibility of the caller.
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index 5c98b10..ed72d67 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -393,6 +393,14 @@
                  kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
                  "mtc1", "!0r,!1s", 4),
+    ENCODING_MAP(kMipsMfhc1, 0x44600000,
+                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mfhc1", "!0r,!1s", 4),
+    ENCODING_MAP(kMipsMthc1, 0x44e00000,
+                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
+                 "mthc1", "!0r,!1s", 4),
     ENCODING_MAP(kMipsDelta, 0x27e00000,
                  kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, 15, 0,
                  kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | REG_USE_LR |
@@ -413,6 +421,21 @@
                  kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP,
                  "sync", ";", 4),
+
+    // The following are mips32r6 instructions.
+    ENCODING_MAP(kMipsR6Div, 0x0000009a,
+                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "div", "!0r,!1r,!2r", 4),
+    ENCODING_MAP(kMipsR6Mod, 0x000000da,
+                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mod", "!0r,!1r,!2r", 4),
+    ENCODING_MAP(kMipsR6Mul, 0x00000098,
+                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0r,!1r,!2r", 4),
+
     ENCODING_MAP(kMipsUndefined, 0x64000000,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
@@ -539,8 +562,8 @@
          * and is found in lir->target.  If operands[3] is non-NULL,
          * then it is a Switch/Data table.
          */
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
@@ -566,14 +589,14 @@
           res = kRetryAll;
         }
       } else if (lir->opcode == kMipsDeltaLo) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = delta & 0xffff;
       } else if (lir->opcode == kMipsDeltaHi) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = (delta >> 16) & 0xffff;
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index d9471f6..b067221 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -263,7 +263,7 @@
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
       }
       void Compile() OVERRIDE {
         m2l_->ResetRegPool();
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e1b43ca..649b6c9 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -76,7 +76,9 @@
 
     // Required for target - register utilities.
     RegStorage Solo64ToPair64(RegStorage reg);
+    RegStorage Fp64ToSolo32(RegStorage reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE;
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -170,7 +172,7 @@
     LIR* OpIT(ConditionCode cond, const char* guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    void OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
@@ -232,6 +234,12 @@
       return false;
     }
 
+    // True if isa is rev R6.
+    const bool isaIsR6_;
+
+    // True if floating point unit is 32bits.
+    const bool fpuIs32Bit_;
+
   private:
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index d7ed7ac..37bf1a6 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -181,6 +181,30 @@
   }
 }
 
+// Get the reg storage for a wide FP. Is either a solo or a pair. Base is Mips-counted, e.g., even
+// values are valid (0, 2).
+static RegStorage GetWideArgFP(bool fpuIs32Bit, size_t base) {
+  // Think about how to make this be able to be computed. E.g., rMIPS_FARG0 + base. Right now
+  // inlining should optimize everything.
+  if (fpuIs32Bit) {
+    switch (base) {
+      case 0:
+        return RegStorage(RegStorage::k64BitPair, rMIPS_FARG0, rMIPS_FARG1);
+      case 2:
+        return RegStorage(RegStorage::k64BitPair, rMIPS_FARG2, rMIPS_FARG3);
+    }
+  } else {
+    switch (base) {
+      case 0:
+        return RegStorage(RegStorage::k64BitSolo, rMIPS_FARG0);
+      case 2:
+        return RegStorage(RegStorage::k64BitSolo, rMIPS_FARG2);
+    }
+  }
+  LOG(FATAL) << "Unsupported Mips.GetWideFP: " << fpuIs32Bit << " " << base;
+  UNREACHABLE();
+}
+
 void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2) {
   bool wide = true;
@@ -208,8 +232,8 @@
   FlushAllRegs();
   LockCallTemps();
   if (wide) {
-    RegStorage r_tmp1(RegStorage::k64BitPair, rMIPS_FARG0, rMIPS_FARG1);
-    RegStorage r_tmp2(RegStorage::k64BitPair, rMIPS_FARG2, rMIPS_FARG3);
+    RegStorage r_tmp1 = GetWideArgFP(fpuIs32Bit_, 0);
+    RegStorage r_tmp2 = GetWideArgFP(fpuIs32Bit_, 2);
     LoadValueDirectWideFixed(rl_src1, r_tmp1);
     LoadValueDirectWideFixed(rl_src2, r_tmp2);
   } else {
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 17ac629..8093c97 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -194,17 +194,34 @@
     bool src_fp = r_src.IsFloat();
     if (dest_fp) {
       if (src_fp) {
+        // Here if both src and dest are fp registers. OpRegCopy will choose the right copy
+        // (solo or pair).
         OpRegCopy(r_dest, r_src);
       } else {
-        /* note the operands are swapped for the mtc1 instr */
-        NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-        NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+        // note the operands are swapped for the mtc1 and mthc1 instr.
+        // Here if dest is fp reg and src is core reg.
+        if (fpuIs32Bit_) {
+            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+            NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+        } else {
+            r_dest = Fp64ToSolo32(r_dest);
+            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
+            NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
+        }
       }
     } else {
       if (src_fp) {
-        NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
-        NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
+        // Here if dest is core reg and src is fp reg.
+        if (fpuIs32Bit_) {
+            NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
+            NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
+        } else {
+            r_src = Fp64ToSolo32(r_src);
+            NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetReg());
+            NewLIR2(kMipsMfhc1, r_dest.GetHighReg(), r_src.GetReg());
+        }
       } else {
+        // Here if both src and dest are core registers.
         // Handle overlap
         if (r_src.GetHighReg() == r_dest.GetLowReg()) {
           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
@@ -243,12 +260,14 @@
 
 RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
                                    bool is_div) {
-  NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg());
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    NewLIR1(kMipsMflo, rl_result.reg.GetReg());
+
+  if (isaIsR6_) {
+      NewLIR3(is_div ? kMipsR6Div : kMipsR6Mod,
+          rl_result.reg.GetReg(), reg1.GetReg(), reg2.GetReg());
   } else {
-    NewLIR1(kMipsMfhi, rl_result.reg.GetReg());
+      NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg());
+      NewLIR1(is_div ? kMipsMflo : kMipsMfhi, rl_result.reg.GetReg());
   }
   return rl_result;
 }
@@ -257,13 +276,7 @@
                                       bool is_div) {
   RegStorage t_reg = AllocTemp();
   NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
-  NewLIR2(kMipsDiv, reg1.GetReg(), t_reg.GetReg());
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    NewLIR1(kMipsMflo, rl_result.reg.GetReg());
-  } else {
-    NewLIR1(kMipsMfhi, rl_result.reg.GetReg());
-  }
+  RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div);
   FreeTemp(t_reg);
   return rl_result;
 }
@@ -335,7 +348,7 @@
   return true;
 }
 
-LIR* MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   UNUSED(reg, target);
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips";
   UNREACHABLE();
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 66e3894..7037055 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -236,22 +236,22 @@
 #endif
   // Double precision registers where the FPU is in 64-bit mode.
   rD0_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
-  rD2_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD3_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
-  rD4_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD5_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
-  rD6_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD7_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
+  rD1_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  rD2_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  rD3_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  rD4_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  rD5_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  rD6_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  rD7_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
 #if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rD8_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD9_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
-  rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
+  rD8_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
+  rD9_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
+  rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
+  rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
+  rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
+  rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
+  rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
+  rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
 #endif
 };
 
@@ -368,10 +368,12 @@
 const RegLocation mips_loc_c_return_float
     {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, rF0), INVALID_SREG, INVALID_SREG};
-// FIXME: move MIPS to k64Bitsolo for doubles
-const RegLocation mips_loc_c_return_double
+const RegLocation mips_loc_c_return_double_fr0
     {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rF0, rF1), INVALID_SREG, INVALID_SREG};
+const RegLocation mips_loc_c_return_double_fr1
+    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, rF0), INVALID_SREG, INVALID_SREG};
 
 enum MipsShiftEncodings {
   kMipsLsl = 0x0,
@@ -476,13 +478,21 @@
   kMipsFldc1,  // ldc1 t,o(b) [110101] b[25..21] t[20..16] o[15..0].
   kMipsFswc1,  // swc1 t,o(b) [111001] b[25..21] t[20..16] o[15..0].
   kMipsFsdc1,  // sdc1 t,o(b) [111101] b[25..21] t[20..16] o[15..0].
-  kMipsMfc1,  // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000].
-  kMipsMtc1,  // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000].
+  kMipsMfc1,   // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000].
+  kMipsMtc1,   // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000].
+  kMipsMfhc1,  // mfhc1 t,s [01000100011] t[20..16] s[15..11] [00000000000].
+  kMipsMthc1,  // mthc1 t,s [01000100111] t[20..16] s[15..11] [00000000000].
   kMipsDelta,  // Psuedo for ori t, s, <label>-<label>.
   kMipsDeltaHi,  // Pseudo for lui t, high16(<label>-<label>).
   kMipsDeltaLo,  // Pseudo for ori t, s, low16(<label>-<label>).
   kMipsCurrPC,  // jal to .+8 to materialize pc.
   kMipsSync,    // sync kind [000000] [0000000000000000] s[10..6] [001111].
+
+  // The following are mips32r6 instructions.
+  kMipsR6Div,   // div d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011010].
+  kMipsR6Mod,   // mod d,s,t [000000] s[25..21] t[20..16] d[15..11] [00011011010].
+  kMipsR6Mul,   // mul d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011000].
+
   kMipsUndefined,  // undefined [011001xxxxxxxxxxxxxxxx].
   kMipsLast
 };
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 8574ffd..830f63a 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -86,16 +86,48 @@
 }
 
 RegLocation MipsMir2Lir::LocCReturnDouble() {
-  return mips_loc_c_return_double;
+  if (fpuIs32Bit_) {
+      return mips_loc_c_return_double_fr0;
+  } else {
+      return mips_loc_c_return_double_fr1;
+  }
 }
 
 // Convert k64BitSolo into k64BitPair
 RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) {
   DCHECK(reg.IsDouble());
+  DCHECK_EQ(reg.GetRegNum() & 1, 0);
   int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
   return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
 }
 
+// Convert 64bit FP (k64BitSolo or k64BitPair) into k32BitSolo.
+// This routine is only used to allow a 64bit FPU to access FP registers 32bits at a time.
+RegStorage MipsMir2Lir::Fp64ToSolo32(RegStorage reg) {
+  DCHECK(!fpuIs32Bit_);
+  DCHECK(reg.IsDouble());
+  DCHECK(!reg.IsPair());
+  int reg_num = reg.GetRegNum() | RegStorage::kFloatingPoint;
+  return RegStorage(RegStorage::k32BitSolo, reg_num);
+}
+
+// Return a target-dependent special register.
+RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg, WideKind wide_kind) {
+  if (wide_kind == kWide) {
+      DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg));
+      RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg),
+                                       TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
+      if (!fpuIs32Bit_ && ret_reg.IsFloat()) {
+        // convert 64BitPair to 64BitSolo for 64bit FPUs.
+        RegStorage low = ret_reg.GetLow();
+        ret_reg = RegStorage::FloatSolo64(low.GetRegNum());
+      }
+      return ret_reg;
+  } else {
+    return TargetReg(reg);
+  }
+}
+
 // Return a target-dependent special register.
 RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
   RegStorage res_reg;
@@ -145,12 +177,7 @@
  */
 ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
   if (reg.IsDouble()) {
-    if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-        ->Is32BitFloatingPoint()) {
-      return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
-    } else {
-      return ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0);
-    }
+    return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
   } else if (reg.IsSingle()) {
     return ResourceMask::Bit(reg.GetRegNum() + kMipsFPReg0);
   } else {
@@ -401,8 +428,7 @@
   Clobber(rs_rF13);
   Clobber(rs_rF14);
   Clobber(rs_rF15);
-  if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-      ->Is32BitFloatingPoint()) {
+  if (fpuIs32Bit_) {
     Clobber(rs_rD0_fr0);
     Clobber(rs_rD1_fr0);
     Clobber(rs_rD2_fr0);
@@ -462,28 +488,20 @@
 }
 
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
-  const bool fpu_is_32bit =
-      cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-      ->Is32BitFloatingPoint();
   reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
                                             sp_regs,
-                                            fpu_is_32bit ? dp_fr0_regs : dp_fr1_regs,
+                                            fpuIs32Bit_ ? dp_fr0_regs : dp_fr1_regs,
                                             reserved_regs, empty_pool /* reserved64 */,
                                             core_temps, empty_pool /* core64_temps */,
                                             sp_temps,
-                                            fpu_is_32bit ? dp_fr0_temps : dp_fr1_temps));
+                                            fpuIs32Bit_ ? dp_fr0_temps : dp_fr1_temps));
 
   // Target-specific adjustments.
 
   // Alias single precision floats to appropriate half of overlapping double.
   for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int sp_reg_num = info->GetReg().GetRegNum();
-    int dp_reg_num;
-    if (fpu_is_32bit) {
-      dp_reg_num = sp_reg_num & ~1;
-    } else {
-      dp_reg_num = sp_reg_num >> 1;
-    }
+    int dp_reg_num = sp_reg_num & ~1;
     RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Double precision register's master storage should refer to itself.
@@ -502,11 +520,7 @@
   // TODO: adjust when we roll to hard float calling convention.
   reg_pool_->next_core_reg_ = 2;
   reg_pool_->next_sp_reg_ = 2;
-  if (fpu_is_32bit) {
-    reg_pool_->next_dp_reg_ = 2;
-  } else {
-    reg_pool_->next_dp_reg_ = 1;
-  }
+  reg_pool_->next_dp_reg_ = 2;
 }
 
 /*
@@ -610,7 +624,11 @@
 }
 
 MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) {
+    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this),
+      isaIsR6_(cu->compiler_driver->GetInstructionSetFeatures()
+                 ->AsMipsInstructionSetFeatures()->IsR6()),
+      fpuIs32Bit_(cu->compiler_driver->GetInstructionSetFeatures()
+                    ->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
   for (int i = 0; i < kMipsLast; i++) {
     DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i)
         << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 2d26922..3b7e0ed 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -182,7 +182,11 @@
       opcode = kMipsAnd;
       break;
     case kOpMul:
-      opcode = kMipsMul;
+      if (isaIsR6_) {
+          opcode = kMipsR6Mul;
+      } else {
+          opcode = kMipsMul;
+      }
       break;
     case kOpOr:
       opcode = kMipsOr;
@@ -271,7 +275,11 @@
       break;
     case kOpMul:
       short_form = false;
-      opcode = kMipsMul;
+      if (isaIsR6_) {
+          opcode = kMipsR6Mul;
+      } else {
+          opcode = kMipsMul;
+      }
       break;
     default:
       LOG(FATAL) << "Bad case in OpRegRegImm";
@@ -359,12 +367,23 @@
 
 LIR* MipsMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
   LIR *res;
-  if (!r_dest.IsPair()) {
-    // Form 64-bit pair
-    r_dest = Solo64ToPair64(r_dest);
+  if (fpuIs32Bit_ || !r_dest.IsFloat()) {
+    // 32bit FPU (pairs) or loading into GPR.
+    if (!r_dest.IsPair()) {
+      // Form 64-bit pair
+      r_dest = Solo64ToPair64(r_dest);
+    }
+    res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value));
+    LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value));
+  } else {
+    // Here if we have a 64bit FPU and loading into FPR.
+    RegStorage r_temp = AllocTemp();
+    r_dest = Fp64ToSolo32(r_dest);
+    res = LoadConstantNoClobber(r_dest, Low32Bits(value));
+    LoadConstantNoClobber(r_temp, High32Bits(value));
+    NewLIR2(kMipsMthc1, r_temp.GetReg(), r_dest.GetReg());
+    FreeTemp(r_temp);
   }
-  res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value));
-  LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value));
   return res;
 }
 
@@ -483,32 +502,29 @@
   LIR *load2 = NULL;
   MipsOpCode opcode = kMipsNop;
   bool short_form = IS_SIMM16(displacement);
-  bool pair = r_dest.IsPair();
+  bool is64bit = false;
 
   switch (size) {
     case k64:
     case kDouble:
-      if (!pair) {
+      is64bit = true;
+      if (fpuIs32Bit_ && !r_dest.IsPair()) {
         // Form 64-bit pair
         r_dest = Solo64ToPair64(r_dest);
-        pair = 1;
-      }
-      if (r_dest.IsFloat()) {
-        DCHECK_EQ(r_dest.GetLowReg(), r_dest.GetHighReg() - 1);
-        opcode = kMipsFlwc1;
-      } else {
-        opcode = kMipsLw;
       }
       short_form = IS_SIMM16_2WORD(displacement);
-      DCHECK_EQ((displacement & 0x3), 0);
-      break;
+      FALLTHROUGH_INTENDED;
     case k32:
     case kSingle:
     case kReference:
       opcode = kMipsLw;
       if (r_dest.IsFloat()) {
         opcode = kMipsFlwc1;
-        DCHECK(r_dest.IsSingle());
+        if (!is64bit) {
+          DCHECK(r_dest.IsSingle());
+        } else {
+          DCHECK(r_dest.IsDouble());
+        }
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -531,35 +547,56 @@
   }
 
   if (short_form) {
-    if (!pair) {
+    if (!is64bit) {
       load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
     } else {
-      load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
-      load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      if (fpuIs32Bit_ || !r_dest.IsFloat()) {
+        DCHECK(r_dest.IsPair());
+        load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      } else {
+        // Here if 64bit fpu and r_dest is a 64bit fp register.
+        RegStorage r_tmp = AllocTemp();
+        // FIXME: why is r_dest a 64BitPair here???
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+        FreeTemp(r_tmp);
+      }
     }
   } else {
-    if (pair) {
-      RegStorage r_tmp = AllocTemp();
-      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
-      load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg());
-      load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg());
-      FreeTemp(r_tmp);
-    } else {
-      RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
+    if (!is64bit) {
+      RegStorage r_tmp = (r_base == r_dest || r_dest.IsFloat()) ? AllocTemp() : r_dest;
       res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
       load = NewLIR3(opcode, r_dest.GetReg(), 0, r_tmp.GetReg());
       if (r_tmp != r_dest)
         FreeTemp(r_tmp);
+    } else {
+      RegStorage r_tmp = AllocTemp();
+      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
+      if (fpuIs32Bit_ || !r_dest.IsFloat()) {
+        DCHECK(r_dest.IsPair());
+        load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg());
+        load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg());
+      } else {
+        // Here if 64bit fpu and r_dest is a 64bit fp register
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), LOWORD_OFFSET, r_tmp.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), HIWORD_OFFSET, r_tmp.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+      }
+      FreeTemp(r_tmp);
     }
   }
 
   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
     DCHECK_EQ(r_base, rs_rMIPS_SP);
-    AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                            true /* is_load */, pair /* is64bit */);
-    if (pair) {
+    AnnotateDalvikRegAccess(load, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2,
+                            true /* is_load */, is64bit /* is64bit */);
+    if (is64bit) {
       AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
-                              true /* is_load */, pair /* is64bit */);
+                              true /* is_load */, is64bit /* is64bit */);
     }
   }
   return load;
@@ -594,32 +631,29 @@
   LIR *store2 = NULL;
   MipsOpCode opcode = kMipsNop;
   bool short_form = IS_SIMM16(displacement);
-  bool pair = r_src.IsPair();
+  bool is64bit = false;
 
   switch (size) {
     case k64:
     case kDouble:
-      if (!pair) {
+      is64bit = true;
+      if (fpuIs32Bit_ && !r_src.IsPair()) {
         // Form 64-bit pair
         r_src = Solo64ToPair64(r_src);
-        pair = 1;
-      }
-      if (r_src.IsFloat()) {
-        DCHECK_EQ(r_src.GetLowReg(), r_src.GetHighReg() - 1);
-        opcode = kMipsFswc1;
-      } else {
-        opcode = kMipsSw;
       }
       short_form = IS_SIMM16_2WORD(displacement);
-      DCHECK_EQ((displacement & 0x3), 0);
-      break;
+      FALLTHROUGH_INTENDED;
     case k32:
     case kSingle:
     case kReference:
       opcode = kMipsSw;
       if (r_src.IsFloat()) {
         opcode = kMipsFswc1;
-        DCHECK(r_src.IsSingle());
+        if (!is64bit) {
+          DCHECK(r_src.IsSingle());
+        } else {
+          DCHECK(r_src.IsDouble());
+        }
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -637,31 +671,53 @@
   }
 
   if (short_form) {
-    if (!pair) {
+    if (!is64bit) {
       store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
     } else {
-      store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
-      store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      if (fpuIs32Bit_ || !r_src.IsFloat()) {
+        DCHECK(r_src.IsPair());
+        store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      } else {
+        // Here if 64bit fpu and r_src is a 64bit fp register
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      }
     }
   } else {
     RegStorage r_scratch = AllocTemp();
     res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
-    if (!pair) {
+    if (!is64bit) {
       store =  NewLIR3(opcode, r_src.GetReg(), 0, r_scratch.GetReg());
     } else {
-      store =  NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg());
-      store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg());
+      if (fpuIs32Bit_ || !r_src.IsFloat()) {
+        DCHECK(r_src.IsPair());
+        store = NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg());
+        store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg());
+      } else {
+        // Here if 64bit fpu and r_src is a 64bit fp register
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = NewLIR3(kMipsFswc1, r_src.GetReg(), LOWORD_OFFSET, r_scratch.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), HIWORD_OFFSET, r_scratch.GetReg());
+        FreeTemp(r_tmp);
+      }
     }
     FreeTemp(r_scratch);
   }
 
   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
     DCHECK_EQ(r_base, rs_rMIPS_SP);
-    AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                            false /* is_load */, pair /* is64bit */);
-    if (pair) {
+    AnnotateDalvikRegAccess(store, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2,
+                            false /* is_load */, is64bit /* is64bit */);
+    if (is64bit) {
       AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
-                              false /* is_load */, pair /* is64bit */);
+                              false /* is_load */, is64bit /* is64bit */);
     }
   }
 
diff --git a/compiler/dex/quick/mips64/assemble_mips64.cc b/compiler/dex/quick/mips64/assemble_mips64.cc
index 17a0ef1..d96561b 100644
--- a/compiler/dex/quick/mips64/assemble_mips64.cc
+++ b/compiler/dex/quick/mips64/assemble_mips64.cc
@@ -629,8 +629,8 @@
          * and is found in lir->target.  If operands[3] is non-NULL,
          * then it is a Switch/Data table.
          */
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
@@ -651,14 +651,14 @@
           res = kRetryAll;
         }
       } else if (lir->opcode == kMips64DeltaLo) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = delta & 0xffff;
       } else if (lir->opcode == kMips64DeltaHi) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = (delta >> 16) & 0xffff;
diff --git a/compiler/dex/quick/mips64/call_mips64.cc b/compiler/dex/quick/mips64/call_mips64.cc
index 31be1c2..0e58770 100644
--- a/compiler/dex/quick/mips64/call_mips64.cc
+++ b/compiler/dex/quick/mips64/call_mips64.cc
@@ -268,7 +268,7 @@
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
       }
       void Compile() OVERRIDE {
         m2l_->ResetRegPool();
diff --git a/compiler/dex/quick/mips64/codegen_mips64.h b/compiler/dex/quick/mips64/codegen_mips64.h
index 57c30d8..c9fd62f 100644
--- a/compiler/dex/quick/mips64/codegen_mips64.h
+++ b/compiler/dex/quick/mips64/codegen_mips64.h
@@ -182,7 +182,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide);
   void OpEndIT(LIR* it);
   LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+  void OpPcRelLoad(RegStorage reg, LIR* target);
   LIR* OpReg(OpKind op, RegStorage r_dest_src);
   void OpRegCopy(RegStorage r_dest, RegStorage r_src);
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/mips64/int_mips64.cc b/compiler/dex/quick/mips64/int_mips64.cc
index 8a57c82..5c545bb 100644
--- a/compiler/dex/quick/mips64/int_mips64.cc
+++ b/compiler/dex/quick/mips64/int_mips64.cc
@@ -283,7 +283,7 @@
   return true;
 }
 
-LIR* Mips64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void Mips64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   UNUSED(reg, target);
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips64";
   UNREACHABLE();
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 8fc45dc..0b480a0 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -27,7 +27,7 @@
 class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
   SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
-      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont),
+      : LIRSlowPath(m2l, branch, cont),
         num_used_args_(0u) {
   }
 
@@ -406,6 +406,7 @@
 bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special) {
   DCHECK(special.flags & kInlineSpecial);
   current_dalvik_offset_ = mir->offset;
+  DCHECK(current_mir_ == nullptr);  // Safepoints attributed to prologue.
   MIR* return_mir = nullptr;
   bool successful = false;
   EnsureInitializedArgMappingToPhysicalReg();
@@ -587,9 +588,6 @@
     case Instruction::MOVE_FROM16:
     case Instruction::MOVE_OBJECT_FROM16:
       StoreValue(rl_dest, rl_src[0]);
-      if (rl_src[0].is_const && (mir_graph_->ConstantValue(rl_src[0]) == 0)) {
-        Workaround7250540(rl_dest, RegStorage::InvalidReg());
-      }
       break;
 
     case Instruction::MOVE_WIDE:
@@ -1276,6 +1274,7 @@
     }
 
     current_dalvik_offset_ = mir->offset;
+    current_mir_ = mir;
     int opcode = mir->dalvikInsn.opcode;
 
     GenPrintLabel(mir);
@@ -1376,6 +1375,7 @@
 
 LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel(int opcode) {
   m2l_->SetCurrentDexPc(current_dex_pc_);
+  m2l_->current_mir_ = current_mir_;
   LIR* target = m2l_->NewLIR0(opcode);
   fromfast_->target = target;
   return target;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 9a56171..cca4e5a 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -131,6 +131,7 @@
 #define MAX_ASSEMBLER_RETRIES 50
 
 class BasicBlock;
+class BitVector;
 struct CallInfo;
 struct CompilationUnit;
 struct InlineMethod;
@@ -490,9 +491,10 @@
 
     class LIRSlowPath : public ArenaObject<kArenaAllocSlowPaths> {
      public:
-      LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
-                  LIR* cont = nullptr) :
-        m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+      LIRSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont = nullptr)
+          : m2l_(m2l), cu_(m2l->cu_),
+            current_dex_pc_(m2l->current_dalvik_offset_), current_mir_(m2l->current_mir_),
+            fromfast_(fromfast), cont_(cont) {
       }
       virtual ~LIRSlowPath() {}
       virtual void Compile() = 0;
@@ -511,6 +513,7 @@
       Mir2Lir* const m2l_;
       CompilationUnit* const cu_;
       const DexOffset current_dex_pc_;
+      MIR* current_mir_;
       LIR* const fromfast_;
       LIR* const cont_;
     };
@@ -582,14 +585,16 @@
      * TUNING: If use of these utilities becomes more common on 32-bit builds, it
      * may be worth conditionally-compiling a set of identity functions here.
      */
-    uint32_t WrapPointer(void* pointer) {
+    template <typename T>
+    uint32_t WrapPointer(const T* pointer) {
       uint32_t res = pointer_storage_.size();
       pointer_storage_.push_back(pointer);
       return res;
     }
 
-    void* UnwrapPointer(size_t index) {
-      return pointer_storage_[index];
+    template <typename T>
+    const T* UnwrapPointer(size_t index) {
+      return reinterpret_cast<const T*>(pointer_storage_[index]);
     }
 
     // strdup(), but allocates from the arena.
@@ -670,6 +675,7 @@
     bool VerifyCatchEntries();
     void CreateMappingTables();
     void CreateNativeGcMap();
+    void CreateNativeGcMapWithoutRegisterPromotion();
     int AssignLiteralOffset(CodeOffset offset);
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
@@ -1379,7 +1385,7 @@
     virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0;
     virtual void OpEndIT(LIR* it) = 0;
     virtual LIR* OpMem(OpKind op, RegStorage r_base, int disp) = 0;
-    virtual LIR* OpPcRelLoad(RegStorage reg, LIR* target) = 0;
+    virtual void OpPcRelLoad(RegStorage reg, LIR* target) = 0;
     virtual LIR* OpReg(OpKind op, RegStorage r_dest_src) = 0;
     virtual void OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
     virtual LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) = 0;
@@ -1459,9 +1465,6 @@
     virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
 
-    // Temp workaround
-    void Workaround7250540(RegLocation rl_dest, RegStorage zero_reg);
-
     virtual LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) = 0;
 
     // Queries for backend support for vectors
@@ -1729,6 +1732,16 @@
     // See CheckRegLocationImpl.
     void CheckRegLocation(RegLocation rl) const;
 
+    // Find the references at the beginning of a basic block (for generating GC maps).
+    void InitReferenceVRegs(BasicBlock* bb, BitVector* references);
+
+    // Update references from prev_mir to mir in the same BB. If mir is null or before
+    // prev_mir, report failure (return false) and update references to the end of the BB.
+    bool UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references);
+
+    // Update references from prev_mir to mir.
+    void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1745,8 +1758,7 @@
     ArenaVector<FillArrayData*> fill_array_data_;
     ArenaVector<RegisterInfo*> tempreg_info_;
     ArenaVector<RegisterInfo*> reginfo_map_;
-    ArenaVector<void*> pointer_storage_;
-    CodeOffset current_code_offset_;    // Working byte offset of machine instructons.
+    ArenaVector<const void*> pointer_storage_;
     CodeOffset data_offset_;            // starting offset of literal pool.
     size_t total_size_;                   // header + code size.
     LIR* block_label_list_;
@@ -1761,6 +1773,7 @@
      * The low-level LIR creation utilites will pull it from here.  Rework this.
      */
     DexOffset current_dalvik_offset_;
+    MIR* current_mir_;
     size_t estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
     std::unique_ptr<RegisterPool> reg_pool_;
     /*
@@ -1799,6 +1812,9 @@
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
 
+    // Record the MIR that generated a given safepoint (nullptr for prologue safepoints).
+    ArenaVector<std::pair<LIR*, MIR*>> safepoints_;
+
   protected:
     // ABI support
     class ShortyArg {
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 6f26b78..118ab1d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1586,13 +1586,11 @@
                            int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
-    Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(table_or_disp));
+    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp);
     disp = tab_rec->offset;
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
-    Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(raw_base_or_table));
+    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
     disp = tab_rec->offset;
   }
   if (entry->opcode == kX86PcRelLoadRA) {
@@ -1794,8 +1792,7 @@
             DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
             DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg));
             // Grab the target offset from the saved data.
-            Mir2Lir::EmbeddedData* tab_rec =
-                reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(lir->operands[4]));
+            const EmbeddedData* tab_rec = UnwrapPointer<Mir2Lir::EmbeddedData>(lir->operands[4]);
             CodeOffset target = tab_rec->offset;
             // Handle 64 bit RIP addressing.
             // Offset is relative to next instruction.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 11c1465..abee872 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -193,7 +193,7 @@
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
       }
       void Compile() OVERRIDE {
         m2l_->ResetRegPool();
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 20163b4..040a8c4 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -296,7 +296,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
   void OpEndIT(LIR* it) OVERRIDE;
   LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
   LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
   void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 91168c7..4eb626c 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1324,7 +1324,7 @@
   return true;
 }
 
-LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   if (cu_->target64) {
     // We can do this directly using RIP addressing.
     // We don't know the proper offset for the value, so pick one that will force
@@ -1334,7 +1334,7 @@
     LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
     res->target = target;
     res->flags.fixup = kFixupLoad;
-    return res;
+    return;
   }
 
   CHECK(base_of_code_ != nullptr);
@@ -1353,11 +1353,9 @@
   // 4 byte offset.  We will fix this up in the assembler later to have the right
   // value.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
-                    0, 0, target);
+  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256);
   res->target = target;
   res->flags.fixup = kFixupLoad;
-  return res;
 }
 
 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
@@ -1412,7 +1410,7 @@
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
                              RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
     }
 
@@ -1460,7 +1458,7 @@
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
                              int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
     }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index dbe4848..f128eb7 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -390,7 +390,7 @@
              break;
           }
           case 'p': {
-            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
+            const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(operand);
             buf += StringPrintf("0x%08x", tab_rec->offset);
             break;
           }
@@ -1062,8 +1062,7 @@
   for (LIR* p : method_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
       uint32_t target_method_idx = p->operands[2];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
@@ -1075,8 +1074,7 @@
   for (LIR* p : class_type_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
 
-      const DexFile* class_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
+      const DexFile* class_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
       uint32_t target_type_idx = p->operands[2];
 
       // The offset to patch is the last 4 bytes of the instruction.
@@ -1090,8 +1088,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kX86CallI);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ff4e0d8..100d49a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -491,11 +491,12 @@
   }
 }
 
-static DexToDexCompilationLevel GetDexToDexCompilationlevel(
+DexToDexCompilationLevel CompilerDriver::GetDexToDexCompilationlevel(
     Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file,
-    const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile::ClassDef& class_def) {
   auto* const runtime = Runtime::Current();
-  if (runtime->UseJit()) {
+  if (runtime->UseJit() || GetCompilerOptions().VerifyAtRuntime()) {
+    // Verify at runtime shouldn't dex to dex since we didn't resolve of verify.
     return kDontDexToDexCompile;
   }
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
@@ -605,12 +606,22 @@
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
-  Resolve(class_loader, dex_files, thread_pool, timings);
-  VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
+  const bool verification_enabled = compiler_options_->IsVerificationEnabled();
+  const bool never_verify = compiler_options_->NeverVerify();
 
-  if (!compiler_options_->IsVerificationEnabled()) {
+  // We need to resolve for never_verify since it needs to run dex to dex to add the
+  // RETURN_VOID_NO_BARRIER.
+  if (never_verify || verification_enabled) {
+    Resolve(class_loader, dex_files, thread_pool, timings);
+    VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
+  }
+
+  if (never_verify) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
     SetVerified(class_loader, dex_files, thread_pool, timings);
+  }
+
+  if (!verification_enabled) {
     return;
   }
 
@@ -1387,8 +1398,11 @@
     }
   } else {
     bool method_in_image = heap->FindSpaceFromObject(method, false)->IsImageSpace();
-    if (method_in_image || compiling_boot) {
+    if (method_in_image || compiling_boot || runtime->UseJit()) {
       // We know we must be able to get to the method in the image, so use that pointer.
+      // In the case where we are the JIT, we can always use direct pointers since we know where
+      // the method and its code are / will be. We don't sharpen to interpreter bridge since we
+      // check IsQuickToInterpreterBridge above.
       CHECK(!method->IsAbstract());
       *type = sharp_type;
       *direct_method = force_relocations ? -1 : reinterpret_cast<uintptr_t>(method);
@@ -2090,6 +2104,8 @@
     return;
   }
 
+  CompilerDriver* const driver = manager->GetCompiler();
+
   // Can we run DEX-to-DEX compiler on this class ?
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
@@ -2097,8 +2113,8 @@
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
-    dex_to_dex_compilation_level = GetDexToDexCompilationlevel(soa.Self(), class_loader, dex_file,
-                                                               class_def);
+    dex_to_dex_compilation_level = driver->GetDexToDexCompilationlevel(
+        soa.Self(), class_loader, dex_file, class_def);
   }
   ClassDataItemIterator it(dex_file, class_data);
   // Skip fields
@@ -2108,7 +2124,6 @@
   while (it.HasNextInstanceField()) {
     it.Next();
   }
-  CompilerDriver* driver = manager->GetCompiler();
 
   bool compilation_enabled = driver->IsClassToCompile(
       dex_file.StringByTypeIdx(class_def.class_idx_));
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 28a8245..9463c2c 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -468,6 +468,10 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  DexToDexCompilationLevel GetDexToDexCompilationlevel(
+      Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file,
+      const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                   ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5042c75..d06ec27 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -32,7 +32,8 @@
  public:
   enum CompilerFilter {
     kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
-    kInterpretOnly,       // Compile nothing except JNI stubs.
+    kInterpretOnly,       // Verify, and compile only JNI stubs.
+    kVerifyAtRuntime,     // Only compile JNI stubs and verify at runtime.
     kSpace,               // Maximize space savings.
     kBalanced,            // Try to get the best performance return on compilation investment.
     kSpeed,               // Maximize runtime performance.
@@ -81,13 +82,23 @@
     compiler_filter_ = compiler_filter;
   }
 
+  bool VerifyAtRuntime() const {
+    return compiler_filter_ == CompilerOptions::kVerifyAtRuntime;
+  }
+
   bool IsCompilationEnabled() const {
-    return ((compiler_filter_ != CompilerOptions::kVerifyNone) &&
-            (compiler_filter_ != CompilerOptions::kInterpretOnly));
+    return compiler_filter_ != CompilerOptions::kVerifyNone &&
+        compiler_filter_ != CompilerOptions::kInterpretOnly &&
+        compiler_filter_ != CompilerOptions::kVerifyAtRuntime;
   }
 
   bool IsVerificationEnabled() const {
-    return (compiler_filter_ != CompilerOptions::kVerifyNone);
+    return compiler_filter_ != CompilerOptions::kVerifyNone &&
+        compiler_filter_ != CompilerOptions::kVerifyAtRuntime;
+  }
+
+  bool NeverVerify() const {
+    return compiler_filter_ == CompilerOptions::kVerifyNone;
   }
 
   size_t GetHugeMethodThreshold() const {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index beb5755..8b31154 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -217,20 +217,21 @@
   auto* const mapping_table = compiled_method->GetMappingTable();
   auto* const vmap_table = compiled_method->GetVmapTable();
   auto* const gc_map = compiled_method->GetGcMap();
+  CHECK(gc_map != nullptr) << PrettyMethod(method);
   // Write out pre-header stuff.
   uint8_t* const mapping_table_ptr = code_cache->AddDataArray(
       self, mapping_table->data(), mapping_table->data() + mapping_table->size());
-  if (mapping_table == nullptr) {
+  if (mapping_table_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   uint8_t* const vmap_table_ptr = code_cache->AddDataArray(
       self, vmap_table->data(), vmap_table->data() + vmap_table->size());
-  if (vmap_table == nullptr) {
+  if (vmap_table_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   uint8_t* const gc_map_ptr = code_cache->AddDataArray(
       self, gc_map->data(), gc_map->data() + gc_map->size());
-  if (gc_map == nullptr) {
+  if (gc_map_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   // Don't touch this until you protect / unprotect the code.
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f513ea8..70bfb81 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1510,25 +1510,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1547,16 +1547,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
                              f3, f4, f5, f6, f7, f8, f9, f10);
@@ -1580,25 +1580,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1617,16 +1617,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
                              i4, i5, i6, i7, i8, i9, i10);
@@ -1649,25 +1649,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1686,16 +1686,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
                              f7, i8, f8, i9, f9, i10, f10);
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
new file mode 100644
index 0000000..0ecc0d7
--- /dev/null
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "boolean_simplifier.h"
+
+namespace art {
+
+// Returns true if 'block1' and 'block2' are empty, merge into the same single
+// successor and the successor can only be reached from them.
+static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
+  if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
+  HBasicBlock* succ1 = block1->GetSuccessors().Get(0);
+  HBasicBlock* succ2 = block2->GetSuccessors().Get(0);
+  return succ1 == succ2 && succ1->GetPredecessors().Size() == 2u;
+}
+
+// Returns true if the outcome of the branching matches the boolean value of
+// the branching condition.
+static bool PreservesCondition(HInstruction* input_true, HInstruction* input_false) {
+  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsOne()
+      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsZero();
+}
+
+// Returns true if the outcome of the branching is exactly opposite of the
+// boolean value of the branching condition.
+static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false) {
+  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsZero()
+      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
+}
+
+// Returns an instruction with the opposite boolean value from 'cond'.
+static HInstruction* GetOppositeCondition(HInstruction* cond) {
+  HGraph* graph = cond->GetBlock()->GetGraph();
+  ArenaAllocator* allocator = graph->GetArena();
+
+  if (cond->IsCondition()) {
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    if (cond->IsEqual()) {
+      return new (allocator) HNotEqual(lhs, rhs);
+    } else if (cond->IsNotEqual()) {
+      return new (allocator) HEqual(lhs, rhs);
+    } else if (cond->IsLessThan()) {
+      return new (allocator) HGreaterThanOrEqual(lhs, rhs);
+    } else if (cond->IsLessThanOrEqual()) {
+      return new (allocator) HGreaterThan(lhs, rhs);
+    } else if (cond->IsGreaterThan()) {
+      return new (allocator) HLessThanOrEqual(lhs, rhs);
+    } else if (cond->IsGreaterThanOrEqual()) {
+      return new (allocator) HLessThan(lhs, rhs);
+    }
+  } else if (cond->IsIntConstant()) {
+    HIntConstant* int_const = cond->AsIntConstant();
+    if (int_const->IsZero()) {
+      return graph->GetIntConstant1();
+    } else {
+      DCHECK(int_const->IsOne());
+      return graph->GetIntConstant0();
+    }
+  }
+
+  LOG(FATAL) << "Instruction " << cond->DebugName() << " used as a condition";
+  UNREACHABLE();
+}
+
+void HBooleanSimplifier::Run() {
+  // Iterate in post order in the unlikely case that removing one occurrence of
+  // the pattern empties a branch block of another occurrence. Otherwise the
+  // order does not matter.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!block->EndsWithIf()) continue;
+
+    // Find elements of the pattern.
+    HIf* if_instruction = block->GetLastInstruction()->AsIf();
+    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+    if (!BlocksDoMergeTogether(true_block, false_block)) {
+      continue;
+    }
+    HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+    if (!merge_block->HasSinglePhi()) {
+      continue;
+    }
+    HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+    HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+    HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+    // Check if the selection negates/preserves the value of the condition and
+    // if so, generate a suitable replacement instruction.
+    HInstruction* if_condition = if_instruction->InputAt(0);
+    HInstruction* replacement;
+    if (NegatesCondition(true_value, false_value)) {
+      replacement = GetOppositeCondition(if_condition);
+      if (replacement->GetBlock() == nullptr) {
+        block->InsertInstructionBefore(replacement, if_instruction);
+      }
+    } else if (PreservesCondition(true_value, false_value)) {
+      replacement = if_condition;
+    } else {
+      continue;
+    }
+
+    // Replace the selection outcome with the new instruction.
+    phi->ReplaceWith(replacement);
+    merge_block->RemovePhi(phi);
+
+    // Link the start/end blocks and remove empty branches.
+    graph_->MergeEmptyBranches(block, merge_block);
+
+    // Remove the original condition if it is now unused.
+    if (!if_condition->HasUses()) {
+      if_condition->GetBlock()->RemoveInstruction(if_condition);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
new file mode 100644
index 0000000..a88733e
--- /dev/null
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This optimization recognizes a common pattern where a boolean value is
+// either cast to an integer or negated by selecting from zero/one integer
+// constants with an If statement. Because boolean values are internally
+// represented as zero/one, we can safely replace the pattern with a suitable
+// condition instruction.
+
+// Example: Negating a boolean value
+//     B1:
+//       z1   ParameterValue
+//       i2   IntConstant 0
+//       i3   IntConstant 1
+//       v4   Goto B2
+//     B2:
+//       z5   NotEquals [ z1 i2 ]
+//       v6   If [ z5 ] then B3 else B4
+//     B3:
+//       v7   Goto B5
+//     B4:
+//       v8   Goto B5
+//     B5:
+//       i9   Phi [ i3 i2 ]
+//       v10  Return [ i9 ]
+// turns into
+//     B1:
+//       z1   ParameterValue
+//       i2   IntConstant 0
+//       v4   Goto B2
+//     B2:
+//       z11  Equals [ z1 i2 ]
+//       v10  Return [ z11 ]
+//     B3, B4, B5: removed
+
+// Note: in order to recognize empty blocks, this optimization must be run
+// after the instruction simplifier has removed redundant suspend checks.
+
+#ifndef ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
+#define ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class HBooleanSimplifier : public HOptimization {
+ public:
+  explicit HBooleanSimplifier(HGraph* graph)
+    : HOptimization(graph, true, kBooleanSimplifierPassName) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2cac93d..a21c311 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1024,8 +1024,6 @@
   HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
 
   uint16_t num_entries = table.GetNumEntries();
-  // There should be at least one entry here.
-  DCHECK_GT(num_entries, 0U);
 
   for (size_t i = 0; i < num_entries; i++) {
     BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value,
@@ -2060,31 +2058,13 @@
   return true;
 }  // NOLINT(readability/fn_size)
 
-HIntConstant* HGraphBuilder::GetIntConstant0() {
-  if (constant0_ != nullptr) {
-    return constant0_;
-  }
-  constant0_ = new(arena_) HIntConstant(0);
-  entry_block_->AddInstruction(constant0_);
-  return constant0_;
-}
-
-HIntConstant* HGraphBuilder::GetIntConstant1() {
-  if (constant1_ != nullptr) {
-    return constant1_;
-  }
-  constant1_ = new(arena_) HIntConstant(1);
-  entry_block_->AddInstruction(constant1_);
-  return constant1_;
-}
-
 HIntConstant* HGraphBuilder::GetIntConstant(int32_t constant) {
   switch (constant) {
-    case 0: return GetIntConstant0();
-    case 1: return GetIntConstant1();
+    case 0: return graph_->GetIntConstant0();
+    case 1: return graph_->GetIntConstant1();
     default: {
       HIntConstant* instruction = new (arena_) HIntConstant(constant);
-      entry_block_->AddInstruction(instruction);
+      graph_->AddConstant(instruction);
       return instruction;
     }
   }
@@ -2092,7 +2072,7 @@
 
 HLongConstant* HGraphBuilder::GetLongConstant(int64_t constant) {
   HLongConstant* instruction = new (arena_) HLongConstant(constant);
-  entry_block_->AddInstruction(instruction);
+  graph_->AddConstant(instruction);
   return instruction;
 }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 96196de..c70170b 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -47,8 +47,6 @@
         exit_block_(nullptr),
         current_block_(nullptr),
         graph_(graph),
-        constant0_(nullptr),
-        constant1_(nullptr),
         dex_file_(dex_file),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
@@ -67,8 +65,6 @@
         exit_block_(nullptr),
         current_block_(nullptr),
         graph_(graph),
-        constant0_(nullptr),
-        constant1_(nullptr),
         dex_file_(nullptr),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
@@ -100,8 +96,6 @@
   void MaybeUpdateCurrentBlock(size_t index);
   HBasicBlock* FindBlockStartingAt(int32_t index) const;
 
-  HIntConstant* GetIntConstant0();
-  HIntConstant* GetIntConstant1();
   HIntConstant* GetIntConstant(int32_t constant);
   HLongConstant* GetLongConstant(int64_t constant);
   void InitializeLocals(uint16_t count);
@@ -253,9 +247,6 @@
   HBasicBlock* current_block_;
   HGraph* const graph_;
 
-  HIntConstant* constant0_;
-  HIntConstant* constant1_;
-
   // The dex file where the method being compiled is.
   const DexFile* const dex_file_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 561dcb7..bd6e943 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,16 +40,6 @@
   return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
 }
 
-static bool IsSingleGoto(HBasicBlock* block) {
-  HLoopInformation* loop_info = block->GetLoopInformation();
-  // TODO: Remove the null check b/19084197.
-  return (block->GetFirstInstruction() != nullptr)
-      && (block->GetFirstInstruction() == block->GetLastInstruction())
-      && block->GetLastInstruction()->IsGoto()
-      // Back edges generate the suspend check.
-      && (loop_info == nullptr || !loop_info->IsBackEdge(block));
-}
-
 void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
   Initialize();
   if (!is_leaf) {
@@ -74,7 +64,7 @@
 HBasicBlock* CodeGenerator::GetNextBlockToEmit() const {
   for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) {
     HBasicBlock* block = block_order_->Get(i);
-    if (!IsSingleGoto(block)) {
+    if (!block->IsSingleGoto()) {
       return block;
     }
   }
@@ -82,7 +72,7 @@
 }
 
 HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
-  while (IsSingleGoto(block)) {
+  while (block->IsSingleGoto()) {
     block = block->GetSuccessors().Get(0);
   }
   return block;
@@ -97,7 +87,7 @@
     // Don't generate code for an empty block. Its predecessors will branch to its successor
     // directly. Also, the label of that block will not be emitted, so this helps catch
     // errors where we reference that label.
-    if (IsSingleGoto(block)) continue;
+    if (block->IsSingleGoto()) continue;
     Bind(block);
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
@@ -628,7 +618,7 @@
               ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
           DCHECK_LT(i, environment_size);
         } else if (current->IsDoubleConstant()) {
-          int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue());
+          int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(
               i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
           stack_map_stream_.AddDexRegisterEntry(
@@ -641,7 +631,7 @@
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant()) << current->DebugName();
-          int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
+          int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
         }
         break;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ecaa6f0..07ca6b1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -271,7 +271,7 @@
       return 0;
     } else {
       DCHECK(constant->IsFloatConstant());
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     }
   }
 
@@ -281,12 +281,12 @@
     } else if (constant->IsNullConstant()) {
       return 0;
     } else if (constant->IsFloatConstant()) {
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     } else if (constant->IsLongConstant()) {
       return constant->AsLongConstant()->GetValue();
     } else {
       DCHECK(constant->IsDoubleConstant());
-      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
     }
   }
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0a069a7..5a79a69 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -883,7 +883,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1388,9 +1388,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
+  // The Java language does not allow treating boolean as an integral type but
+  // our bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1407,6 +1412,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1451,6 +1458,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1487,6 +1496,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1503,6 +1514,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1536,6 +1549,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1582,6 +1597,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1597,6 +1614,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1654,6 +1673,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1692,6 +1713,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1707,6 +1730,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1773,6 +1798,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index aeec5dd..9455a91 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -63,6 +63,7 @@
 using helpers::VIXLRegCodeFromART;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
+using helpers::ARM64EncodableConstantOrRegister;
 
 static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
 static constexpr int kCurrentMethodStackOffset = 0;
@@ -1106,7 +1107,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1398,7 +1399,7 @@
   switch (in_type) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
+      locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1468,7 +1469,7 @@
 void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
@@ -1620,7 +1621,7 @@
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
 
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -2116,7 +2117,7 @@
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RegisterOrConstant(neg->InputAt(0)));
+      locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 754dd10..4414a65 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -680,7 +680,7 @@
         value = constant->AsLongConstant()->GetValue();
       } else {
         DCHECK(constant->IsDoubleConstant());
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       }
       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
@@ -792,7 +792,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1370,9 +1370,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
+  // The Java language does not allow treating boolean as an integral type but
+  // our bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1391,6 +1396,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1435,6 +1442,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1464,6 +1473,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1480,6 +1491,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1511,6 +1524,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1556,6 +1571,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1577,6 +1594,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1672,6 +1691,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1703,6 +1724,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1726,6 +1749,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1783,6 +1808,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -2730,26 +2757,45 @@
   Label less, greater, done;
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
+      Register left_low = left.AsRegisterPairLow<Register>();
+      Register left_high = left.AsRegisterPairHigh<Register>();
+      int32_t val_low = 0;
+      int32_t val_high = 0;
+      bool right_is_const = false;
+
+      if (right.IsConstant()) {
+        DCHECK(right.GetConstant()->IsLongConstant());
+        right_is_const = true;
+        int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
+        val_low = Low32Bits(val);
+        val_high = High32Bits(val);
+      }
+
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
+        __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+        __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_high == 0) {
+          __ testl(left_high, left_high);
+        } else {
+          __ cmpl(left_high, Immediate(val_high));
+        }
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>());
+        __ cmpl(left_low, right.AsRegisterPairLow<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
+        __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairLow<Register>(),
-                Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_low == 0) {
+          __ testl(left_low, left_low);
+        } else {
+          __ cmpl(left_low, Immediate(val_low));
+        }
       }
       break;
     }
@@ -3645,14 +3691,21 @@
         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
       }
     } else if (constant->IsFloatConstant()) {
-      float value = constant->AsFloatConstant()->GetValue();
-      Immediate imm(bit_cast<float, int32_t>(value));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<int32_t, float>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        ScratchRegisterScope ensure_scratch(
-            this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-        Register temp = static_cast<Register>(ensure_scratch.GetRegister());
-        __ movl(temp, imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), temp);
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // Easy handling of 0.0.
+          __ xorps(dest, dest);
+        } else {
+          ScratchRegisterScope ensure_scratch(
+              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+          __ movl(temp, Immediate(value));
+          __ movd(dest, temp);
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(ESP, destination.GetStackIndex()), imm);
@@ -3673,7 +3726,7 @@
     } else {
       DCHECK(constant->IsDoubleConstant());
       double dbl_value = constant->AsDoubleConstant()->GetValue();
-      int64_t value = bit_cast<double, int64_t>(dbl_value);
+      int64_t value = bit_cast<int64_t, double>(dbl_value);
       int32_t low_value = Low32Bits(value);
       int32_t high_value = High32Bits(value);
       Immediate low(low_value);
@@ -4107,18 +4160,38 @@
     } else {
       DCHECK(second.IsConstant()) << second;
       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-      Immediate low(Low32Bits(value));
-      Immediate high(High32Bits(value));
+      int32_t low_value = Low32Bits(value);
+      int32_t high_value = High32Bits(value);
+      Immediate low(low_value);
+      Immediate high(high_value);
+      Register first_low = first.AsRegisterPairLow<Register>();
+      Register first_high = first.AsRegisterPairHigh<Register>();
       if (instruction->IsAnd()) {
-        __ andl(first.AsRegisterPairLow<Register>(), low);
-        __ andl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value == 0) {
+          __ xorl(first_low, first_low);
+        } else if (low_value != -1) {
+          __ andl(first_low, low);
+        }
+        if (high_value == 0) {
+          __ xorl(first_high, first_high);
+        } else if (high_value != -1) {
+          __ andl(first_high, high);
+        }
       } else if (instruction->IsOr()) {
-        __ orl(first.AsRegisterPairLow<Register>(), low);
-        __ orl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ orl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ orl(first_high, high);
+        }
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.AsRegisterPairLow<Register>(), low);
-        __ xorl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ xorl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ xorl(first_high, high);
+        }
       }
     }
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dbd7c9e..c1f601e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -625,7 +625,7 @@
       HConstant* constant = source.GetConstant();
       int64_t value = constant->AsLongConstant()->GetValue();
       if (constant->IsDoubleConstant()) {
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       } else {
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
@@ -729,7 +729,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -956,7 +956,7 @@
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -982,7 +982,18 @@
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
-      __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>());
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        if (value == 0) {
+          __ testq(left_reg, left_reg);
+        } else {
+          __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+        }
+      } else {
+        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+      }
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1398,9 +1409,15 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // The Java language does not allow treating boolean as an integral type but
+  // our bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1417,6 +1434,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1461,6 +1480,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1494,6 +1515,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1510,6 +1533,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1539,6 +1564,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1582,6 +1609,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1606,6 +1635,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1704,6 +1735,8 @@
     case Primitive::kPrimLong:
       switch (input_type) {
         DCHECK(out.IsRegister());
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1771,6 +1804,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1795,6 +1830,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1821,6 +1858,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1865,17 +1904,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      HInstruction* rhs = add->InputAt(1);
-      bool is_int32_constant = false;
-      if (rhs->IsLongConstant()) {
-        int64_t value = rhs->AsLongConstant()->GetValue();
-        if (static_cast<int32_t>(value) == value) {
-          is_int32_constant = true;
-        }
-      }
-      locations->SetInAt(1,
-          is_int32_constant ? Location::RegisterOrConstant(rhs) :
-                              Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1973,7 +2002,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2007,7 +2036,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
@@ -2038,8 +2073,13 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::SameAsFirstInput());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
+      if (locations->InAt(1).IsConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimFloat:
@@ -2059,9 +2099,9 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.Equals(locations->Out()));
       if (second.IsRegister()) {
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
@@ -2075,16 +2115,27 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ imulq(locations->Out().AsRegister<CpuRegister>(),
+                 first.AsRegister<CpuRegister>(),
+                 Immediate(static_cast<int32_t>(value)));
+      } else {
+        DCHECK(first.Equals(locations->Out()));
+        __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
@@ -3320,20 +3371,35 @@
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     } else if (constant->IsFloatConstant()) {
-      Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<int32_t, float>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movl(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // easy FP 0.0.
+          __ xorps(dest, dest);
+        } else {
+          __ movl(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
-      Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+      double fp_value =  constant->AsDoubleConstant()->GetValue();
+      int64_t value = bit_cast<int64_t, double>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movq(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          __ xorpd(dest, dest);
+        } else {
+          __ movq(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), imm);
@@ -3673,8 +3739,9 @@
   if (instruction->GetType() == Primitive::kPrimInt) {
     locations->SetInAt(1, Location::Any());
   } else {
-    // Request a register to avoid loading a 64bits constant.
+    // We can handle 32 bit constants.
     locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
@@ -3730,13 +3797,34 @@
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    CpuRegister first_reg = first.AsRegister<CpuRegister>();
+    bool second_is_constant = false;
+    int64_t value = 0;
+    if (second.IsConstant()) {
+      second_is_constant = true;
+      value = second.GetConstant()->AsLongConstant()->GetValue();
+      DCHECK(IsInt<32>(value));
+    }
+
     if (instruction->IsAnd()) {
-      __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ andq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else if (instruction->IsOr()) {
-      __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ orq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else {
       DCHECK(instruction->IsXor());
-      __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ xorq(first_reg, second.AsRegister<CpuRegister>());
+      }
     }
   }
 }
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 868fc5b..40f0adc 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -145,6 +145,7 @@
                              std::function<void(HGraph*)> hook_before_codegen,
                              bool has_result,
                              Expected expected) {
+  graph->BuildDominatorTree();
   SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 9447d3b..fd8c0c6 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -183,6 +183,40 @@
   }
 }
 
+static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant());
+
+  // For single uses we let VIXL handle the constant generation since it will
+  // use registers that are not managed by the register allocator (wip0, wip1).
+  if (constant->GetUses().HasOnlyOneUse()) {
+    return true;
+  }
+
+  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+  if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() || instr->IsCompare()) {
+    // Uses aliases of ADD/SUB instructions.
+    return vixl::Assembler::IsImmAddSub(value);
+  } else if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
+    // Uses logical operations.
+    return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+  } else {
+    DCHECK(instr->IsNeg());
+    // Uses mov -immediate.
+    return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+  }
+}
+
+static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
+                                                        HInstruction* instr) {
+  if (constant->IsConstant()
+      && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 76b9f4f..09a3ae4 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -227,13 +227,13 @@
   } else {
     HLoopInformation* loop_information = loop_header->GetLoopInformation();
     HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
-    if (loop_information->IsBackEdge(first_predecessor)) {
+    if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
     HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
-    if (!loop_information->IsBackEdge(second_predecessor)) {
+    if (!loop_information->IsBackEdge(*second_predecessor)) {
       AddError(StringPrintf(
           "Second predecessor of loop header %d is not a back edge.",
           id));
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 4ac1fe8..a1ae670 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -56,6 +56,19 @@
       : Location::RequiresRegister();
 }
 
+Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
+  if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) {
+    return Location::RequiresRegister();
+  }
+
+  // Does the long constant fit in a 32 bit int?
+  int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue();
+
+  return IsInt<32>(value)
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
   return instruction->IsConstant()
       ? Location::ConstantLocation(instruction->AsConstant())
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 566c0da..de876be 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -345,6 +345,7 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
+  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
 
   // The location of the first input to the instruction will be
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index a90ebce..4f6565d 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -185,7 +185,7 @@
   if (successor->IsLoopHeader()) {
     // If we split at a back edge boundary, make the new block the back edge.
     HLoopInformation* info = successor->GetLoopInformation();
-    if (info->IsBackEdge(block)) {
+    if (info->IsBackEdge(*block)) {
       info->RemoveBackEdge(block);
       info->AddBackEdge(new_block);
     }
@@ -287,19 +287,49 @@
   return true;
 }
 
+void HGraph::AddConstant(HConstant* instruction) {
+  HInstruction* last_instruction = entry_block_->GetLastInstruction();
+  if (last_instruction == nullptr || !last_instruction->IsControlFlow()) {
+    // Called from the builder. Insert at the end of the block.
+    entry_block_->AddInstruction(instruction);
+  } else {
+    // Entry block ends with control-flow. Insert before the last instruction.
+    entry_block_->InsertInstructionBefore(instruction, last_instruction);
+  }
+}
+
 HNullConstant* HGraph::GetNullConstant() {
   if (cached_null_constant_ == nullptr) {
     cached_null_constant_ = new (arena_) HNullConstant();
-    entry_block_->InsertInstructionBefore(cached_null_constant_,
-                                          entry_block_->GetLastInstruction());
+    AddConstant(cached_null_constant_);
   }
   return cached_null_constant_;
 }
 
+HIntConstant* HGraph::GetIntConstant0() {
+  if (cached_int_constant0_ == nullptr) {
+    cached_int_constant0_ = new (arena_) HIntConstant(0);
+    AddConstant(cached_int_constant0_);
+  }
+  return cached_int_constant0_;
+}
+
+HIntConstant* HGraph::GetIntConstant1() {
+  if (cached_int_constant1_ == nullptr) {
+    cached_int_constant1_ = new (arena_) HIntConstant(1);
+    AddConstant(cached_int_constant1_);
+  }
+  return cached_int_constant1_;
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
 
+void HLoopInformation::Remove(HBasicBlock* block) {
+  blocks_.ClearBit(block->GetBlockId());
+}
+
 void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
   if (blocks_.IsBitSet(block->GetBlockId())) {
     return;
@@ -621,7 +651,10 @@
 void HGraphVisitor::VisitInsertionOrder() {
   const GrowableArray<HBasicBlock*>& blocks = graph_->GetBlocks();
   for (size_t i = 0 ; i < blocks.Size(); i++) {
-    VisitBasicBlock(blocks.Get(i));
+    HBasicBlock* block = blocks.Get(i);
+    if (block != nullptr) {
+      VisitBasicBlock(block);
+    }
   }
 }
 
@@ -788,6 +821,25 @@
   return new_block;
 }
 
+bool HBasicBlock::IsSingleGoto() const {
+  HLoopInformation* loop_info = GetLoopInformation();
+  // TODO: Remove the null check b/19084197.
+  return GetFirstInstruction() != nullptr
+         && GetPhis().IsEmpty()
+         && GetFirstInstruction() == GetLastInstruction()
+         && GetLastInstruction()->IsGoto()
+         // Back edges generate the suspend check.
+         && (loop_info == nullptr || !loop_info->IsBackEdge(*this));
+}
+
+bool HBasicBlock::EndsWithIf() const {
+  return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf();
+}
+
+bool HBasicBlock::HasSinglePhi() const {
+  return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
+}
+
 void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const {
   for (HInstruction* current = first_instruction_;
        current != nullptr;
@@ -811,14 +863,35 @@
 }
 
 void HInstructionList::Add(const HInstructionList& instruction_list) {
-  DCHECK(!IsEmpty());
-  AddAfter(last_instruction_, instruction_list);
+  if (IsEmpty()) {
+    first_instruction_ = instruction_list.first_instruction_;
+    last_instruction_ = instruction_list.last_instruction_;
+  } else {
+    AddAfter(last_instruction_, instruction_list);
+  }
+}
+
+void HBasicBlock::DisconnectFromAll() {
+  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario";
+
+  for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
+    predecessors_.Get(i)->successors_.Delete(this);
+  }
+  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
+    successors_.Get(i)->predecessors_.Delete(this);
+  }
+  dominator_->dominated_blocks_.Delete(this);
+
+  predecessors_.Reset();
+  successors_.Reset();
+  dominator_ = nullptr;
+  graph_ = nullptr;
 }
 
 void HBasicBlock::MergeWith(HBasicBlock* other) {
   DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(other->GetDominator()->IsEntryBlock() && other->GetGraph() != graph_)
+  DCHECK(dominated_blocks_.IsEmpty()
+         || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other))
       << "Unimplemented block merge scenario";
   DCHECK(other->GetPhis().IsEmpty());
 
@@ -1006,7 +1079,7 @@
     if (info != nullptr) {
       info->Add(to);
       to->SetLoopInformation(info);
-      if (info->IsBackEdge(at)) {
+      if (info->IsBackEdge(*at)) {
         // Only `at` can become a back edge, as the inlined blocks
         // are predecessors of `at`.
         DCHECK_EQ(1u, info->NumberOfBackEdges());
@@ -1020,6 +1093,53 @@
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
 
+void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) {
+  // Find the two branches of an If.
+  DCHECK_EQ(start_block->GetSuccessors().Size(), 2u);
+  HBasicBlock* left_branch = start_block->GetSuccessors().Get(0);
+  HBasicBlock* right_branch = start_block->GetSuccessors().Get(1);
+
+  // Make sure this is a diamond control-flow path.
+  DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block);
+  DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block);
+  DCHECK_EQ(end_block->GetPredecessors().Size(), 2u);
+  DCHECK_EQ(start_block, end_block->GetDominator());
+
+  // Disconnect the branches and merge the two blocks. This will move
+  // all instructions from 'end_block' to 'start_block'.
+  DCHECK(left_branch->IsSingleGoto());
+  DCHECK(right_branch->IsSingleGoto());
+  left_branch->DisconnectFromAll();
+  right_branch->DisconnectFromAll();
+  start_block->RemoveInstruction(start_block->GetLastInstruction());
+  start_block->MergeWith(end_block);
+
+  // Delete the now redundant blocks from the graph.
+  blocks_.Put(left_branch->GetBlockId(), nullptr);
+  blocks_.Put(right_branch->GetBlockId(), nullptr);
+  blocks_.Put(end_block->GetBlockId(), nullptr);
+
+  // Update reverse post order.
+  reverse_post_order_.Delete(left_branch);
+  reverse_post_order_.Delete(right_branch);
+  reverse_post_order_.Delete(end_block);
+
+  // Update loops which contain the code.
+  for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    DCHECK(loop_info->Contains(*left_branch));
+    DCHECK(loop_info->Contains(*right_branch));
+    DCHECK(loop_info->Contains(*end_block));
+    loop_info->Remove(left_branch);
+    loop_info->Remove(right_branch);
+    loop_info->Remove(end_block);
+    if (loop_info->IsBackEdge(*end_block)) {
+      loop_info->RemoveBackEdge(end_block);
+      loop_info->AddBackEdge(start_block);
+    }
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
   ScopedObjectAccess soa(Thread::Current());
   os << "["
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index a35fa1d..664cf18 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -128,6 +128,7 @@
   void SetExitBlock(HBasicBlock* block) { exit_block_ = block; }
 
   void AddBlock(HBasicBlock* block);
+  void AddConstant(HConstant* instruction);
 
   // Try building the SSA form of this graph, with dominance computation and loop
   // recognition. Returns whether it was successful in doing all these steps.
@@ -154,6 +155,8 @@
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
   void InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
+  void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block);
+
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
   void SimplifyLoop(HBasicBlock* header);
 
@@ -217,6 +220,8 @@
   bool IsDebuggable() const { return debuggable_; }
 
   HNullConstant* GetNullConstant();
+  HIntConstant* GetIntConstant0();
+  HIntConstant* GetIntConstant1();
 
  private:
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
@@ -267,6 +272,10 @@
   // Cached null constant that might be created when building SSA form.
   HNullConstant* cached_null_constant_;
 
+  // Cached common constants often needed by optimization passes.
+  HIntConstant* cached_int_constant0_;
+  HIntConstant* cached_int_constant1_;
+
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -300,9 +309,9 @@
     back_edges_.Delete(back_edge);
   }
 
-  bool IsBackEdge(HBasicBlock* block) {
+  bool IsBackEdge(const HBasicBlock& block) const {
     for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
-      if (back_edges_.Get(i) == block) return true;
+      if (back_edges_.Get(i) == &block) return true;
     }
     return false;
   }
@@ -336,6 +345,7 @@
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
   void Add(HBasicBlock* block);
+  void Remove(HBasicBlock* block);
 
  private:
   // Internal recursive implementation of `Populate`.
@@ -391,6 +401,8 @@
     return graph_->GetExitBlock() == this;
   }
 
+  bool IsSingleGoto() const;
+
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
@@ -512,8 +524,16 @@
   // of `this` are moved to `other`.
   // Note that this method does not update the graph, reverse post order, loop
   // information, nor make sure the blocks are consistent (for example ending
+  // with a control flow instruction).
   void ReplaceWith(HBasicBlock* other);
 
+  // Disconnects `this` from all its predecessors, successors and the dominator.
+  // It assumes that `this` does not dominate any blocks.
+  // Note that this method does not update the graph, reverse post order, loop
+  // information, nor make sure the blocks are consistent (for example ending
+  // with a control flow instruction).
+  void DisconnectFromAll();
+
   void AddInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
@@ -582,6 +602,9 @@
   bool IsCatchBlock() const { return is_catch_block_; }
   void SetIsCatchBlock() { is_catch_block_ = true; }
 
+  bool EndsWithIf() const;
+  bool HasSinglePhi() const;
+
  private:
   HGraph* graph_;
   GrowableArray<HBasicBlock*> predecessors_;
@@ -604,6 +627,31 @@
   DISALLOW_COPY_AND_ASSIGN(HBasicBlock);
 };
 
+// Iterates over the LoopInformation of all loops which contain 'block'
+// from the innermost to the outermost.
+class HLoopInformationOutwardIterator : public ValueObject {
+ public:
+  explicit HLoopInformationOutwardIterator(const HBasicBlock& block)
+      : current_(block.GetLoopInformation()) {}
+
+  bool Done() const { return current_ == nullptr; }
+
+  void Advance() {
+    DCHECK(!Done());
+    current_ = current_->GetHeader()->GetDominator()->GetLoopInformation();
+  }
+
+  HLoopInformation* Current() const {
+    DCHECK(!Done());
+    return current_;
+  }
+
+ private:
+  HLoopInformation* current_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator);
+};
+
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   M(Add, BinaryOperation)                                               \
   M(And, BinaryOperation)                                               \
@@ -1873,20 +1921,22 @@
   float GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) ==
-        bit_cast<float, int32_t>(value_);
+    return bit_cast<uint32_t, float>(other->AsFloatConstant()->value_) ==
+        bit_cast<uint32_t, float>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f));
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>((-1.0f));
   }
   bool IsZero() const OVERRIDE {
     return AsFloatConstant()->GetValue() == 0.0f;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f);
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>(1.0f);
   }
 
   DECLARE_INSTRUCTION(FloatConstant);
@@ -1904,20 +1954,22 @@
   double GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) ==
-        bit_cast<double, int64_t>(value_);
+    return bit_cast<uint64_t, double>(other->AsDoubleConstant()->value_) ==
+        bit_cast<uint64_t, double>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0));
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>((-1.0));
   }
   bool IsZero() const OVERRIDE {
     return AsDoubleConstant()->GetValue() == 0.0;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0);
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>(1.0);
   }
 
   DECLARE_INSTRUCTION(DoubleConstant);
@@ -3459,7 +3511,10 @@
 
 class HReversePostOrderIterator : public ValueObject {
  public:
-  explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
+  explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {
+    // Check that reverse post order of the graph has been built.
+    DCHECK(!graph.GetReversePostOrder().IsEmpty());
+  }
 
   bool Done() const { return index_ == graph_.GetReversePostOrder().Size(); }
   HBasicBlock* Current() const { return graph_.GetReversePostOrder().Get(index_); }
@@ -3475,7 +3530,10 @@
 class HPostOrderIterator : public ValueObject {
  public:
   explicit HPostOrderIterator(const HGraph& graph)
-      : graph_(graph), index_(graph_.GetReversePostOrder().Size()) {}
+      : graph_(graph), index_(graph_.GetReversePostOrder().Size()) {
+    // Check that reverse post order of the graph has been built.
+    DCHECK(!graph.GetReversePostOrder().IsEmpty());
+  }
 
   bool Done() const { return index_ == 0; }
   HBasicBlock* Current() const { return graph_.GetReversePostOrder().Get(index_ - 1); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b70f925..eaa30df 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,6 +22,7 @@
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/timing_logger.h"
+#include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -313,6 +314,7 @@
   HDeadCodeElimination dce(graph);
   HConstantFolding fold1(graph);
   InstructionSimplifier simplify1(graph, stats);
+  HBooleanSimplifier boolean_not(graph);
 
   HInliner inliner(graph, dex_compilation_unit, driver, stats);
 
@@ -331,6 +333,9 @@
     &dce,
     &fold1,
     &simplify1,
+    // BooleanSimplifier depends on the InstructionSimplifier removing redundant
+    // suspend checks to recognize empty blocks.
+    &boolean_not,
     &inliner,
     &fold2,
     &side_effects,
@@ -462,13 +467,22 @@
     return nullptr;
   }
 
+  // Implementation of the space filter: do not compile a code item whose size in
+  // code units is bigger than 256.
+  static constexpr size_t kSpaceFilterOptimizingThreshold = 256;
+  const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
+  if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
+      && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
+    compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+    return nullptr;
+  }
+
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
     compiler_driver->GetVerifiedMethod(&dex_file, method_idx));
 
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
+  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
   HGraph* graph = new (&arena) HGraph(
       &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 3ebf0f8..22ec2a5 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,6 +38,7 @@
   kNotCompiledUnresolvedMethod,
   kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
+  kNotCompiledSpaceFilter,
   kNotOptimizedTryCatch,
   kNotOptimizedDisabled,
   kNotCompiledCantAccesType,
@@ -96,6 +97,7 @@
       case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
       case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
       case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index b757a3b..7a2d84b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -596,6 +596,8 @@
   graph->AddBlock(exit);
   block->AddSuccessor(exit);
   exit->AddInstruction(new (allocator) HExit());
+
+  graph->BuildDominatorTree();
   return graph;
 }
 
@@ -658,6 +660,8 @@
   block->AddInstruction(*second_sub);
 
   block->AddInstruction(new (allocator) HExit());
+
+  graph->BuildDominatorTree();
   return graph;
 }
 
@@ -719,6 +723,8 @@
   block->AddInstruction(*div);
 
   block->AddInstruction(new (allocator) HExit());
+
+  graph->BuildDominatorTree();
   return graph;
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ba11e90..ae6bf16 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -359,12 +359,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue()));
+    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int32_t, float>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
@@ -381,12 +381,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue()));
+    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int64_t, double>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index c0d6f42..56ccd71 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -71,8 +71,8 @@
   //      for it.
   GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size());
   forward_predecessors.SetSize(graph_.GetBlocks().Size());
-  for (size_t i = 0, e = graph_.GetBlocks().Size(); i < e; ++i) {
-    HBasicBlock* block = graph_.GetBlocks().Get(i);
+  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().Size();
     if (block->IsLoopHeader()) {
       // We rely on having simplified the CFG.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f2704b7..bd155ed 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1277,6 +1277,14 @@
 }
 
 
+void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // orq only supports 32b immediate.
+  EmitRex64(dst);
+  EmitComplex(1, Operand(dst), imm);
+}
+
+
 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(dst, src);
@@ -1548,27 +1556,30 @@
 
 
 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
+  imulq(reg, reg, imm);
+}
+
+void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
 
-  EmitRex64(reg, reg);
+  EmitRex64(dst, reg);
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int64_t v64 = imm.value();
   if (IsInt<8>(v64)) {
     // Sign-extension works.
     EmitUint8(0x6B);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
   } else {
     // Not representable, use full immediate.
     EmitUint8(0x69);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitImmediate(imm);
   }
 }
 
-
 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 5dfcf45..495f74f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -429,6 +429,7 @@
   void orl(CpuRegister dst, CpuRegister src);
   void orl(CpuRegister reg, const Address& address);
   void orq(CpuRegister dst, CpuRegister src);
+  void orq(CpuRegister dst, const Immediate& imm);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorl(CpuRegister dst, const Immediate& imm);
@@ -467,6 +468,7 @@
   void imulq(CpuRegister dst, CpuRegister src);
   void imulq(CpuRegister reg, const Immediate& imm);
   void imulq(CpuRegister reg, const Address& address);
+  void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
 
   void imull(CpuRegister reg);
   void imull(const Address& address);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index a372179..dfea783 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -109,9 +109,14 @@
 
   UsageError("Usage: dex2oat [options]...");
   UsageError("");
-  UsageError("  --dex-file=<dex-file>: specifies a .dex file to compile.");
+  UsageError("  --dex-file=<dex-file>: specifies a .dex, .jar, or .apk file to compile.");
   UsageError("      Example: --dex-file=/system/framework/core.jar");
   UsageError("");
+  UsageError("  --dex-location=<dex-location>: specifies an alternative dex location to");
+  UsageError("      encode in the oat file for the corresponding --dex-file argument.");
+  UsageError("      Example: --dex-file=/home/build/out/system/framework/core.jar");
+  UsageError("               --dex-location=/system/framework/core.jar");
+  UsageError("");
   UsageError("  --zip-fd=<file-descriptor>: specifies a file descriptor of a zip file");
   UsageError("      containing a classes.dex file to compile.");
   UsageError("      Example: --zip-fd=5");
@@ -614,7 +619,6 @@
           Usage("Unknown compiler backend: %s", backend_str.data());
         }
       } else if (option.starts_with("--compiler-filter=")) {
-        requested_specific_compiler = true;
         compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
       } else if (option == "--compile-pic") {
         compile_pic = true;
@@ -877,15 +881,7 @@
     }
 
     if (compiler_filter_string == nullptr) {
-      if (instruction_set_ == kMips &&
-          reinterpret_cast<const MipsInstructionSetFeatures*>(instruction_set_features_.get())->
-          IsR6()) {
-        // For R6, only interpreter mode is working.
-        // TODO: fix compiler for Mips32r6.
-        compiler_filter_string = "interpret-only";
-      } else {
-        compiler_filter_string = "speed";
-      }
+      compiler_filter_string = "speed";
     }
 
     CHECK(compiler_filter_string != nullptr);
@@ -894,6 +890,8 @@
       compiler_filter = CompilerOptions::kVerifyNone;
     } else if (strcmp(compiler_filter_string, "interpret-only") == 0) {
       compiler_filter = CompilerOptions::kInterpretOnly;
+    } else if (strcmp(compiler_filter_string, "verify-at-runtime") == 0) {
+      compiler_filter = CompilerOptions::kVerifyAtRuntime;
     } else if (strcmp(compiler_filter_string, "space") == 0) {
       compiler_filter = CompilerOptions::kSpace;
     } else if (strcmp(compiler_filter_string, "balanced") == 0) {
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 3d8a567..b27b555 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -71,10 +71,14 @@
   { kRTypeMask, 17, "mthi", "S", },
   { kRTypeMask, 18, "mflo", "D", },
   { kRTypeMask, 19, "mtlo", "S", },
-  { kRTypeMask, 24, "mult", "ST", },
-  { kRTypeMask, 25, "multu", "ST", },
-  { kRTypeMask, 26, "div", "ST", },
-  { kRTypeMask, 27, "divu", "ST", },
+  { kRTypeMask | (0x1f << 6), 24, "mult", "ST", },
+  { kRTypeMask | (0x1f << 6), 25, "multu", "ST", },
+  { kRTypeMask | (0x1f << 6), 26, "div", "ST", },
+  { kRTypeMask | (0x1f << 6), 27, "divu", "ST", },
+  { kRTypeMask | (0x1f << 6), 24 + (2 << 6), "mul", "DST", },
+  { kRTypeMask | (0x1f << 6), 24 + (3 << 6), "muh", "DST", },
+  { kRTypeMask | (0x1f << 6), 26 + (2 << 6), "div", "DST", },
+  { kRTypeMask | (0x1f << 6), 26 + (3 << 6), "mod", "DST", },
   { kRTypeMask, 32, "add", "DST", },
   { kRTypeMask, 33, "addu", "DST", },
   { kRTypeMask, 34, "sub", "DST", },
diff --git a/runtime/arch/instruction_set_features.cc b/runtime/arch/instruction_set_features.cc
index 1fd1dea..db4b0b1 100644
--- a/runtime/arch/instruction_set_features.cc
+++ b/runtime/arch/instruction_set_features.cc
@@ -250,7 +250,11 @@
     }
     first = true;
   }
-  DCHECK_EQ(use_default, features.empty());
+  // Expectation: "default" is standalone, no other flags. But an empty features vector after
+  // processing can also come along if the handled flags (at the moment only smp) are the only
+  // ones in the list. So logically, we check "default -> features.empty."
+  DCHECK(!use_default || features.empty());
+
   return AddFeaturesFromSplitString(smp, features, error_msg);
 }
 
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 00ab613..93d79b7 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -24,13 +24,56 @@
 
 namespace art {
 
+// An enum for the Mips revision.
+enum class MipsLevel {
+  kBase,
+  kR2,
+  kR5,
+  kR6
+};
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kR6;
+#elif defined(_MIPS_ARCH_MIPS32R5)
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kR5;
+#elif defined(_MIPS_ARCH_MIPS32R2)
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kR2;
+#else
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kBase;
+#endif
+
+static void GetFlagsFromCppDefined(bool* mips_isa_gte2, bool* r6, bool* fpu_32bit) {
+  // Override defaults based on compiler flags.
+  if (kRuntimeMipsLevel >= MipsLevel::kR2) {
+    *mips_isa_gte2 = true;
+  } else {
+    *mips_isa_gte2 = false;
+  }
+
+  if (kRuntimeMipsLevel >= MipsLevel::kR5) {
+    *fpu_32bit = false;
+  } else {
+    *fpu_32bit = true;
+  }
+
+  if (kRuntimeMipsLevel >= MipsLevel::kR6) {
+    *r6 = true;
+  } else {
+    *r6 = false;
+  }
+}
+
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
 
   bool smp = true;  // Conservative default.
-  bool fpu_32bit = true;
-  bool mips_isa_gte2 = false;
-  bool r6 = false;
+
+  // Override defaults based on compiler flags.
+  // This is needed when running ART test where the variant is not defined.
+  bool fpu_32bit;
+  bool mips_isa_gte2;
+  bool r6;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   // Override defaults based on variant string.
   // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit.
@@ -67,19 +110,11 @@
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCppDefines() {
   // Assume conservative defaults.
   const bool smp = true;
-  bool fpu_32bit = true;
-  bool mips_isa_gte2 = false;
-  bool r6 = false;
 
-  // Override defaults based on compiler flags.
-#if (_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6)
-  mips_isa_gte2 = true;
-#endif
-
-#if defined(_MIPS_ARCH_MIPS32R6)
-  r6 = true;
-  fpu_32bit = false;
-#endif
+  bool fpu_32bit;
+  bool mips_isa_gte2;
+  bool r6;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
@@ -89,19 +124,11 @@
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   // Assume conservative defaults.
   bool smp = false;
-  bool fpu_32bit = true;
-  bool mips_isa_gte2 = false;
-  bool r6 = false;
 
-  // Override defaults based on compiler flags.
-#if (_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6)
-  mips_isa_gte2 = true;
-#endif
-
-#if defined(_MIPS_ARCH_MIPS32R6)
-  r6 = true;
-  fpu_32bit = false;
-#endif
+  bool fpu_32bit;
+  bool mips_isa_gte2;
+  bool r6;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0f874a4..5edcd96 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -883,7 +883,44 @@
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movl MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
+                                                               // Load the class
+    movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
+    testl %edx, %edx                                           // Check null class
+    jz   .Lart_quick_alloc_object_tlab_slow_path
+                                                               // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne  .Lart_quick_alloc_object_tlab_slow_path
+                                                               // Check access flags has kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz  .Lart_quick_alloc_object_tlab_slow_path
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
+    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
+    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
+    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
+    addq %rax, %rcx                                            // Add the object size.
+    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
+    ja   .Lart_quick_alloc_object_tlab_slow_path
+    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
+                                                               // Store the class pointer in the header.
+                                                               // No fence needed for x86.
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeTLAB)      // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO         // return or deliver exception
+END_FUNCTION art_quick_alloc_object_tlab
+
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 92f4ebe..b1dbf6f 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -103,6 +103,16 @@
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 125 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
+            art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
+            art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+
 // Offsets within java.lang.Object.
 #define MIRROR_OBJECT_CLASS_OFFSET 0
 ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
@@ -120,6 +130,22 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (52 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
+            art::mirror::Class::AccessFlagsOffset().Int32Value())
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (80 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
+            art::mirror::Class::ObjectSizeOffset().Int32Value())
+#define MIRROR_CLASS_STATUS_OFFSET (92 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
+            art::mirror::Class::StatusOffset().Int32Value())
+
+#define MIRROR_CLASS_STATUS_INITIALIZED 10
+ADD_TEST_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED),
+            static_cast<uint32_t>(art::mirror::Class::kStatusInitialized))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
+ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
+            static_cast<uint32_t>(kAccClassIsFinalizable))
 
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
@@ -134,6 +160,10 @@
     art::mirror::Array::DataOffset(
         sizeof(art::mirror::HeapReference<art::mirror::Object>)).Int32Value())
 
+#define MIRROR_OBJECT_ARRAY_COMPONENT_SIZE 4
+ADD_TEST_EQ(static_cast<size_t>(MIRROR_OBJECT_ARRAY_COMPONENT_SIZE),
+            sizeof(art::mirror::HeapReference<art::mirror::Object>))
+
 // Offsets within java.lang.String.
 #define MIRROR_STRING_VALUE_OFFSET  MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
@@ -149,6 +179,10 @@
 ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET,
             art::mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())
 
+#define MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET,
+            art::mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value())
+
 #define MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32        (36 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32,
             art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
@@ -178,6 +212,13 @@
 #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
 ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
 
+#define OBJECT_ALIGNMENT_MASK 7
+ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - 1)
+
+#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xFFFFFFF8
+ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
+            ~static_cast<uint32_t>(art::kObjectAlignment - 1))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6296cf5..7144577 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDebuggerActive;     // debugger is making requests.
 static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
@@ -319,6 +318,7 @@
 static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
+bool Dbg::gDebuggerActive = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -331,7 +331,6 @@
 // Deoptimization support.
 std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
 size_t Dbg::full_deoptimization_event_count_ = 0;
-size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
 // Instrumentation event reference counters.
 size_t Dbg::dex_pc_change_event_ref_count_ = 0;
@@ -620,7 +619,7 @@
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
   // Only called from the JDWP handler thread.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     return;
   }
 
@@ -634,7 +633,6 @@
     MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
-    CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
     CHECK_EQ(dex_pc_change_event_ref_count_, 0U);
     CHECK_EQ(method_enter_event_ref_count_, 0U);
     CHECK_EQ(method_exit_event_ref_count_, 0U);
@@ -673,7 +671,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
 
   // Debugger may not be active at this point.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     {
       // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
       // This prevents us from having any pending deoptimization request when the debugger attaches
@@ -681,7 +679,6 @@
       MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
-      delayed_full_undeoptimization_count_ = 0U;
     }
     if (instrumentation_events_ != 0) {
       runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
@@ -704,10 +701,6 @@
   gDebuggerConnected = false;
 }
 
-bool Dbg::IsDebuggerActive() {
-  return gDebuggerActive;
-}
-
 void Dbg::ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options) {
   CHECK_NE(jdwp_options.transport, JDWP::kJdwpTransportUnknown);
   gJdwpOptions = jdwp_options;
@@ -3020,29 +3013,6 @@
   }
 }
 
-void Dbg::DelayFullUndeoptimization() {
-  if (RequiresDeoptimization()) {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    ++delayed_full_undeoptimization_count_;
-    DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
-  }
-}
-
-void Dbg::ProcessDelayedFullUndeoptimizations() {
-  // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
-  {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    while (delayed_full_undeoptimization_count_ > 0) {
-      DeoptimizationRequest req;
-      req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
-      req.SetMethod(nullptr);
-      RequestDeoptimizationLocked(req);
-      --delayed_full_undeoptimization_count_;
-    }
-  }
-  ManageDeoptimization();
-}
-
 void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
   if (req.GetKind() == DeoptimizationRequest::kNothing) {
     // Nothing to do.
@@ -3352,6 +3322,125 @@
   }
 }
 
+bool Dbg::IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m) {
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc == nullptr) {
+    // If we are not single-stepping, then we don't have to force interpreter.
+    return false;
+  }
+  if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
+    // If we are in interpreter only mode, then we don't have to force interpreter.
+    return false;
+  }
+
+  if (!m->IsNative() && !m->IsProxyMethod()) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Dbg::IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+    // If we are stepping out from a static initializer, by issuing a step
+    // in or step over, that was implicitly invoked by calling a static method,
+    // then we need to step into that method. Having a lower stack depth than
+    // the one the single step control has indicates that the step originates
+    // from the static initializer.
+    if (ssc->GetStepDepth() != JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // There are cases where we have to force interpreter on deoptimized methods,
+  // because in some cases the call will not be performed by invoking an entry
+  // point that has been replaced by the deoptimization, but instead by directly
+  // invoking the compiled code of the method, for example.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we are stepping out from a static initializer, by issuing a step
+    // out, that was implicitly invoked by calling a static method, then we
+    // need to step into the caller of that method. Having a lower stack
+    // depth than the one the single step control has indicates that the
+    // step originates from the static initializer.
+    if (ssc->GetStepDepth() == JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // If we are returning from a static intializer, that was implicitly
+  // invoked by calling a static method and the caller is deoptimized,
+  // then we have to deoptimize the stack without forcing interpreter
+  // on the static method that was called originally. This problem can
+  // be solved easily by forcing instrumentation on the called method,
+  // because the instrumentation exit hook will recognise the need of
+  // stack deoptimization by calling IsForcedInterpreterNeededForUpcall.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // The debugger is not interested in what is happening under the level
+    // of the step, thus we only force interpreter when we are not below of
+    // the step.
+    if (ssc->GetStackDepth() >= GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // We have to require stack deoptimization if the upcall is deoptimized.
+  return instrumentation->IsDeoptimized(m);
+}
+
 // Scoped utility class to suspend a thread so that we may do tasks such as walk its stack. Doesn't
 // cause suspension if the thread is the current thread.
 class ScopedThreadSuspension {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 01c9d5d..d015294 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -243,7 +243,9 @@
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
-  static bool IsDebuggerActive();
+  static bool IsDebuggerActive() {
+    return gDebuggerActive;
+  }
 
   // Configures JDWP with parsed command-line options.
   static void ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options);
@@ -543,13 +545,6 @@
       LOCKS_EXCLUDED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Support delayed full undeoptimization requests. This is currently only used for single-step
-  // events.
-  static void DelayFullUndeoptimization() LOCKS_EXCLUDED(Locks::deoptimization_lock_);
-  static void ProcessDelayedFullUndeoptimizations()
-      LOCKS_EXCLUDED(Locks::deoptimization_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Manage deoptimization after updating JDWP events list. Suspends all threads, processes each
   // request and finally resumes all threads.
   static void ManageDeoptimization()
@@ -564,6 +559,53 @@
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  /*
+   * Forced interpreter checkers for single-step and continue support.
+   */
+
+  // Indicates whether we need to force the use of interpreter to invoke a method.
+  // This allows to single-step or continue into the called method.
+  static bool IsForcedInterpreterNeededForCalling(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForCallingImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter entrypoint when calling a
+  // method through the resolution trampoline. This allows to single-step or continue into
+  // the called method.
+  static bool IsForcedInterpreterNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of instrumentation entrypoint when calling
+  // a method through the resolution trampoline. This allows to deoptimize the stack for
+  // debugging when we returned from the called method.
+  static bool IsForcedInstrumentationNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInstrumentationNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter when returning from the
+  // interpreter into the runtime. This allows to deoptimize the stack and continue
+  // execution with interpreter for debugging.
+  static bool IsForcedInterpreterNeededForUpcall(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForUpcallImpl(thread, m);
+  }
+
   // Single-stepping.
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
@@ -690,11 +732,27 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static bool IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  // Indicates whether the debugger is making requests.
+  static bool gDebuggerActive;
+
+  // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
   // Deoptimization requests to be processed each time the event list is updated. This is used when
@@ -709,10 +767,6 @@
   // undeoptimize when the last event is unregistered (when the counter is set to 0).
   static size_t full_deoptimization_event_count_ GUARDED_BY(Locks::deoptimization_lock_);
 
-  // Count the number of full undeoptimization requests delayed to next resume or end of debug
-  // session.
-  static size_t delayed_full_undeoptimization_count_ GUARDED_BY(Locks::deoptimization_lock_);
-
   static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event);
 
   // Weak global type cache, TODO improve this.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 70ee042..8351e22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -30,6 +30,7 @@
 #include "mirror/object_array-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "debugger.h"
 
 namespace art {
 
@@ -639,6 +640,14 @@
     JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    // Request a stack deoptimization if needed
+    mirror::ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+      self->SetException(Thread::GetDeoptimizationException());
+      self->SetDeoptimizationReturnValue(result);
+    }
+
     // No need to restore the args since the method has already been run by the interpreter.
     return result.GetJ();
   }
@@ -950,14 +959,37 @@
         called->GetDexCache()->SetResolvedMethod(called_dex_method_idx, called);
       }
     }
+
     // Ensure that the called method's class is initialized.
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(soa.Self(), called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromQuickCompiledCode();
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (UNLIKELY(Dbg::IsForcedInstrumentationNeededForResolution(self, caller))) {
+        // If the caller is deoptimized (by a breakpoint, for example), we have to
+        // continue its execution with interpreter when returning from the called
+        // method. Because we do not want to execute the called method with the
+        // interpreter, we wrap its execution into the instrumentation stubs.
+        // When the called method returns, it will execute the instrumentation
+        // exit hook that will determine the need of the interpreter with a call
+        // to Dbg::IsForcedInterpreterNeededForUpcall and deoptimize the stack if
+        // it is needed.
+        code = GetQuickInstrumentationEntryPoint();
+      } else {
+        code = called->GetEntryPointFromQuickCompiledCode();
+      }
     } else if (called_class->IsInitializing()) {
-      if (invoke_type == kStatic) {
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
         code = linker->GetQuickOatCodeFor(called);
@@ -1152,7 +1184,7 @@
       gpr_index_--;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushGpr(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushGpr(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushGpr(val);
       }
@@ -1160,7 +1192,7 @@
       stack_entries_++;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushStack(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushStack(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushStack(val);
       }
@@ -1220,16 +1252,16 @@
 
   void AdvanceFloat(float val) {
     if (kNativeSoftFloatAbi) {
-      AdvanceInt(bit_cast<float, uint32_t>(val));
+      AdvanceInt(bit_cast<uint32_t, float>(val));
     } else {
       if (HaveFloatFpr()) {
         fpr_index_--;
         if (kRegistersNeededForDouble == 1) {
           if (kMultiFPRegistersWidened) {
-            PushFpr8(bit_cast<double, uint64_t>(val));
+            PushFpr8(bit_cast<uint64_t, double>(val));
           } else {
             // No widening, just use the bits.
-            PushFpr8(bit_cast<float, uint64_t>(val));
+            PushFpr8(static_cast<uint64_t>(bit_cast<uint32_t, float>(val)));
           }
         } else {
           PushFpr4(val);
@@ -1240,9 +1272,9 @@
           // Need to widen before storing: Note the "double" in the template instantiation.
           // Note: We need to jump through those hoops to make the compiler happy.
           DCHECK_EQ(sizeof(uintptr_t), sizeof(uint64_t));
-          PushStack(static_cast<uintptr_t>(bit_cast<double, uint64_t>(val)));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint64_t, double>(val)));
         } else {
-          PushStack(bit_cast<float, uintptr_t>(val));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint32_t, float>(val)));
         }
         fpr_index_ = 0;
       }
@@ -1876,8 +1908,8 @@
       case 'F': {
         if (kRuntimeISA == kX86) {
           // Convert back the result to float.
-          double d = bit_cast<uint64_t, double>(result_f);
-          return bit_cast<float, uint32_t>(static_cast<float>(d));
+          double d = bit_cast<double, uint64_t>(result_f);
+          return bit_cast<uint32_t, float>(static_cast<float>(d));
         } else {
           return result_f;
         }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index dff8f4d..51cf558 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2061,7 +2061,6 @@
   MutexLock mu(self, zygote_creation_lock_);
   // Try to see if we have any Zygote spaces.
   if (HasZygoteSpace()) {
-    LOG(WARNING) << __FUNCTION__ << " called when we already have a zygote space.";
     return;
   }
   Runtime::Current()->GetInternTable()->SwapPostZygoteWithPreZygote();
diff --git a/runtime/globals.h b/runtime/globals.h
index 0845475..ac8751c 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -109,6 +109,13 @@
 static constexpr bool kPoisonHeapReferences = false;
 #endif
 
+// If true, enable the tlab allocator by default.
+#ifdef ART_USE_TLAB
+static constexpr bool kUseTlab = true;
+#else
+static constexpr bool kUseTlab = false;
+#endif
+
 // Kinds of tracing clocks.
 enum class TraceClockSource {
   kThreadCpu,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index b53b8cd..9adb4ac 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1030,7 +1030,8 @@
   NthCallerVisitor visitor(self, 1, true);
   visitor.WalkStack(true);
   bool deoptimize = (visitor.caller != nullptr) &&
-                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller));
+                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
+                    Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
   if (deoptimize) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 26ab602..a3ab026 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -18,6 +18,7 @@
 
 #include <cmath>
 
+#include "debugger.h"
 #include "mirror/array-inl.h"
 #include "unstarted_runtime.h"
 
@@ -616,8 +617,14 @@
           << PrettyMethod(new_shadow_frame->GetMethod());
       UNREACHABLE();
     }
-    (new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter())(self, code_item,
-                                                                    new_shadow_frame, result);
+    // Force the use of interpreter when it is required by the debugger.
+    mirror::EntryPointFromInterpreter* entry;
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForCalling(self, new_shadow_frame->GetMethod()))) {
+      entry = &art::artInterpreterToInterpreterBridge;
+    } else {
+      entry = new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter();
+    }
+    entry(self, code_item, new_shadow_frame, result);
   } else {
     UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 15396d6..7d413c5 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -317,7 +317,10 @@
   int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t(inst_data));
   DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
   uint16_t size = switch_data[1];
-  DCHECK_GT(size, 0);
+  // Return length of SPARSE_SWITCH if size is 0.
+  if (size == 0) {
+    return 3;
+  }
   const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
   DCHECK(IsAligned<4>(keys));
   const int32_t* entries = keys + size;
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index fbbc863..98dfdbd 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -460,7 +460,7 @@
 static void UnstartedDoubleDoubleToRawLongBits(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   double in = shadow_frame->GetVRegDouble(arg_offset);
-  result->SetJ(bit_cast<int64_t>(in));
+  result->SetJ(bit_cast<int64_t, double>(in));
 }
 
 static mirror::Object* GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index e68616f..09bfbf3 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -631,20 +631,20 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_LAZY);
+  void* handle = dlopen(path_str, RTLD_NOW);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
-      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_LAZY);
+      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_NOW);
       needs_native_bridge = true;
     }
   }
 
-  VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_LAZY) returned " << handle << "]";
+  VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_NOW) returned " << handle << "]";
 
   if (handle == nullptr) {
     *error_msg = dlerror();
-    LOG(ERROR) << "dlopen(\"" << path << "\", RTLD_LAZY) failed: " << *error_msg;
+    VLOG(jni) << "dlopen(\"" << path << "\", RTLD_NOW) failed: " << *error_msg;
     return false;
   }
 
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4bf7142..c9a4483 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -133,7 +133,6 @@
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
       case EK_METHOD_EXIT_WITH_RETURN_VALUE:
-      case EK_SINGLE_STEP:
       case EK_FIELD_ACCESS:
       case EK_FIELD_MODIFICATION:
         return true;
@@ -278,16 +277,7 @@
         Dbg::UnconfigureStep(pMod->step.threadId);
       }
     }
-    if (pEvent->eventKind == EK_SINGLE_STEP) {
-      // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
-      // loop between each single-step. In a IDE, this would happens each time the user click on the
-      // "single-step" button. Here we delay the full undeoptimization to the next resume
-      // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
-      // runtime shutdown).
-      // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
-      // deoptimization and only the last single-step will trigger a full undeoptimization.
-      Dbg::DelayFullUndeoptimization();
-    } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
+    if (NeedsFullDeoptimization(pEvent->eventKind)) {
       CHECK_EQ(req.GetKind(), DeoptimizationRequest::kNothing);
       CHECK(req.Method() == nullptr);
       req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c7083dc..add1394 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -295,7 +295,6 @@
  */
 static JdwpError VM_Resume(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::ProcessDelayedFullUndeoptimizations();
   Dbg::ResumeVM();
   return ERR_NONE;
 }
@@ -989,8 +988,6 @@
     return ERR_NONE;
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   Dbg::ResumeThread(thread_id);
   return ERR_NONE;
 }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 3d69796..e2b88a5 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -322,8 +322,6 @@
     CHECK(event_list_ == nullptr);
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   /*
    * Should not have one of these in progress.  If the debugger went away
    * mid-request, though, we could see this.
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index aa8c717..8a20e39 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -47,33 +47,50 @@
   static constexpr size_t kMaxCapacity = 1 * GB;
   static constexpr size_t kDefaultCapacity = 2 * MB;
 
+  // Create the code cache with a code + data capacity equal to "capacity", error message is passed
+  // in the out arg error_msg.
   static JitCodeCache* Create(size_t capacity, std::string* error_msg);
 
   const uint8_t* CodeCachePtr() const {
     return code_cache_ptr_;
   }
+
   size_t CodeCacheSize() const {
     return code_cache_ptr_ - code_cache_begin_;
   }
+
   size_t CodeCacheRemain() const {
     return code_cache_end_ - code_cache_ptr_;
   }
+
+  const uint8_t* DataCachePtr() const {
+    return data_cache_ptr_;
+  }
+
   size_t DataCacheSize() const {
     return data_cache_ptr_ - data_cache_begin_;
   }
+
   size_t DataCacheRemain() const {
     return data_cache_end_ - data_cache_ptr_;
   }
+
   size_t NumMethods() const {
     return num_methods_;
   }
 
+  // Return true if the code cache contains the code pointer which si the entrypoint of the method.
   bool ContainsMethod(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Return true if the code cache contains a code ptr.
   bool ContainsCodePtr(const void* ptr) const;
 
+  // Reserve a region of code of size at least "size". Returns nullptr if there is no more room.
   uint8_t* ReserveCode(Thread* self, size_t size) LOCKS_EXCLUDED(lock_);
 
+  // Add a data array of size (end - begin) with the associated contents, returns nullptr if there
+  // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
       LOCKS_EXCLUDED(lock_);
 
@@ -81,14 +98,19 @@
   const void* GetCodeFor(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
+  // Save the compiled code for a method so that GetCodeFor(method) will return old_code_ptr if the
+  // entrypoint isn't within the cache.
   void SaveCompiledCode(mirror::ArtMethod* method, const void* old_code_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
  private:
   // Takes ownership of code_mem_map.
   explicit JitCodeCache(MemMap* code_mem_map);
+
+  // Unimplemented, TODO: Determine if it is necessary.
   void FlushInstructionCache();
 
+  // Lock which guards.
   Mutex lock_;
   // Mem map which holds code and data. We do this since we need to have 32 bit offsets from method
   // headers in code cache which point to things in the data cache. If the maps are more than 4GB
@@ -106,7 +128,7 @@
   // TODO: This relies on methods not moving.
   // This map holds code for methods if they were deoptimized by the instrumentation stubs. This is
   // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks.
-  SafeMap<mirror::ArtMethod*, const void*> method_code_map_;
+  SafeMap<mirror::ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JitCodeCache);
 };
diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc
new file mode 100644
index 0000000..2155552
--- /dev/null
+++ b/runtime/jit/jit_code_cache_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+
+#include "class_linker.h"
+#include "jit_code_cache.h"
+#include "mirror/art_method-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace jit {
+
+class JitCodeCacheTest : public CommonRuntimeTest {
+ public:
+};
+
+TEST_F(JitCodeCacheTest, TestCoverage) {
+  std::string error_msg;
+  constexpr size_t kSize = 1 * MB;
+  std::unique_ptr<JitCodeCache> code_cache(
+      JitCodeCache::Create(kSize, &error_msg));
+  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
+  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
+  ASSERT_EQ(code_cache->CodeCacheSize(), 0u);
+  ASSERT_GT(code_cache->CodeCacheRemain(), 0u);
+  ASSERT_TRUE(code_cache->DataCachePtr() != nullptr);
+  ASSERT_EQ(code_cache->DataCacheSize(), 0u);
+  ASSERT_GT(code_cache->DataCacheRemain(), 0u);
+  ASSERT_EQ(code_cache->CodeCacheRemain() + code_cache->DataCacheRemain(), kSize);
+  ASSERT_EQ(code_cache->NumMethods(), 0u);
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  uint8_t* const reserved_code = code_cache->ReserveCode(soa.Self(), 4 * KB);
+  ASSERT_TRUE(reserved_code != nullptr);
+  ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code));
+  ASSERT_EQ(code_cache->NumMethods(), 1u);
+  ClassLinker* const cl = Runtime::Current()->GetClassLinker();
+  auto h_method = hs.NewHandle(cl->AllocArtMethod(soa.Self()));
+  ASSERT_FALSE(code_cache->ContainsMethod(h_method.Get()));
+  h_method->SetEntryPointFromQuickCompiledCode(reserved_code);
+  ASSERT_TRUE(code_cache->ContainsMethod(h_method.Get()));
+  ASSERT_EQ(code_cache->GetCodeFor(h_method.Get()), reserved_code);
+  // Save the code and then change it.
+  code_cache->SaveCompiledCode(h_method.Get(), reserved_code);
+  h_method->SetEntryPointFromQuickCompiledCode(nullptr);
+  ASSERT_EQ(code_cache->GetCodeFor(h_method.Get()), reserved_code);
+  const uint8_t data_arr[] = {1, 2, 3, 4, 5};
+  uint8_t* data_ptr = code_cache->AddDataArray(soa.Self(), data_arr, data_arr + sizeof(data_arr));
+  ASSERT_TRUE(data_ptr != nullptr);
+  ASSERT_EQ(memcmp(data_ptr, data_arr, sizeof(data_arr)), 0);
+}
+
+TEST_F(JitCodeCacheTest, TestOverflow) {
+  std::string error_msg;
+  constexpr size_t kSize = 1 * MB;
+  std::unique_ptr<JitCodeCache> code_cache(
+      JitCodeCache::Create(kSize, &error_msg));
+  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
+  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
+  size_t code_bytes = 0;
+  size_t data_bytes = 0;
+  constexpr size_t kCodeArrSize = 4 * KB;
+  constexpr size_t kDataArrSize = 4 * KB;
+  uint8_t data_arr[kDataArrSize] = {53};
+  // Add code and data until we are full.
+  uint8_t* code_ptr = nullptr;
+  uint8_t* data_ptr = nullptr;
+  do {
+    code_ptr = code_cache->ReserveCode(Thread::Current(), kCodeArrSize);
+    data_ptr = code_cache->AddDataArray(Thread::Current(), data_arr, data_arr + kDataArrSize);
+    if (code_ptr != nullptr) {
+      code_bytes += kCodeArrSize;
+    }
+    if (data_ptr != nullptr) {
+      data_bytes += kDataArrSize;
+    }
+  } while (code_ptr != nullptr || data_ptr != nullptr);
+  // Make sure we added a reasonable amount
+  CHECK_GT(code_bytes, 0u);
+  CHECK_LE(code_bytes, kSize);
+  CHECK_GT(data_bytes, 0u);
+  CHECK_LE(data_bytes, kSize);
+  CHECK_GE(code_bytes + data_bytes, kSize * 4 / 5);
+}
+
+}  // namespace jit
+}  // namespace art
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 1048214..5516eab 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1355,24 +1355,38 @@
   s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80");
   EXPECT_NE(s, nullptr);
   EXPECT_EQ(2, env_->GetStringLength(s));
-  // Note that this uses 2 x 3 byte UTF sequences, one
-  // for each half of the surrogate pair.
-  EXPECT_EQ(6, env_->GetStringUTFLength(s));
+
+  // The surrogate pair gets encoded into a 4 byte UTF sequence..
+  EXPECT_EQ(4, env_->GetStringUTFLength(s));
   const char* chars = env_->GetStringUTFChars(s, nullptr);
-  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80", chars);
+  EXPECT_STREQ("\xf0\x90\x90\x80", chars);
   env_->ReleaseStringUTFChars(s, chars);
 
+  // .. but is stored as is in the utf-16 representation.
+  const jchar* jchars = env_->GetStringChars(s, nullptr);
+  EXPECT_EQ(0xd801, jchars[0]);
+  EXPECT_EQ(0xdc00, jchars[1]);
+  env_->ReleaseStringChars(s, jchars);
+
   // 4 byte UTF sequence appended to an encoded surrogate pair.
   s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80 \xf0\x9f\x8f\xa0");
   EXPECT_NE(s, nullptr);
-  EXPECT_EQ(5, env_->GetStringLength(s));
-  EXPECT_EQ(13, env_->GetStringUTFLength(s));
-  chars = env_->GetStringUTFChars(s, nullptr);
+
   // The 4 byte sequence {0xf0, 0x9f, 0x8f, 0xa0} is converted into a surrogate
-  // pair {0xd83c, 0xdfe0} which is then converted into a two three byte
-  // sequences {0xed 0xa0, 0xbc} and {0xed, 0xbf, 0xa0}, one for each half of
-  // the surrogate pair.
-  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80 \xed\xa0\xbc\xed\xbf\xa0", chars);
+  // pair {0xd83c, 0xdfe0}.
+  EXPECT_EQ(5, env_->GetStringLength(s));
+  jchars = env_->GetStringChars(s, nullptr);
+  // The first surrogate pair, encoded as such in the input.
+  EXPECT_EQ(0xd801, jchars[0]);
+  EXPECT_EQ(0xdc00, jchars[1]);
+  // The second surrogate pair, from the 4 byte UTF sequence in the input.
+  EXPECT_EQ(0xd83c, jchars[3]);
+  EXPECT_EQ(0xdfe0, jchars[4]);
+  env_->ReleaseStringChars(s, jchars);
+
+  EXPECT_EQ(9, env_->GetStringUTFLength(s));
+  chars = env_->GetStringUTFChars(s, nullptr);
+  EXPECT_STREQ("\xf0\x90\x90\x80 \xf0\x9f\x8f\xa0", chars);
   env_->ReleaseStringUTFChars(s, chars);
 
   // A string with 1, 2, 3 and 4 byte UTF sequences with spaces
@@ -1380,7 +1394,7 @@
   s = env_->NewStringUTF("\x24 \xc2\xa2 \xe2\x82\xac \xf0\x9f\x8f\xa0");
   EXPECT_NE(s, nullptr);
   EXPECT_EQ(8, env_->GetStringLength(s));
-  EXPECT_EQ(15, env_->GetStringUTFLength(s));
+  EXPECT_EQ(13, env_->GetStringUTFLength(s));
 }
 
 TEST_F(JniInternalTest, NewString) {
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 939a1a9..f867f6a 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/value_object.h"
@@ -60,23 +61,6 @@
     *ComputeInternalPointer<T>(offset) = value;
   }
 
-  // TODO: Local hack to prevent name clashes between two conflicting
-  // implementations of bit_cast:
-  // - art::bit_cast<Destination, Source> runtime/base/casts.h, and
-  // - art::bit_cast<Source, Destination> from runtime/utils.h.
-  // Remove this when these routines have been merged.
-  template<typename Source, typename Destination>
-  static Destination local_bit_cast(Source in) {
-    static_assert(sizeof(Source) <= sizeof(Destination),
-                  "Size of Source not <= size of Destination");
-    union {
-      Source u;
-      Destination v;
-    } tmp;
-    tmp.u = in;
-    return tmp.v;
-  }
-
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` does not need to be word-aligned.
   template<typename T> T LoadUnaligned(uintptr_t offset) const {
@@ -88,7 +72,7 @@
       equivalent_unsigned_integer_value +=
           *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte);
     }
-    return local_bit_cast<U, T>(equivalent_unsigned_integer_value);
+    return bit_cast<T, U>(equivalent_unsigned_integer_value);
   }
 
   // Store `value` (of type `T`) at `offset`.  The memory address
@@ -96,7 +80,7 @@
   template<typename T> void StoreUnaligned(uintptr_t offset, T value) const {
     // Equivalent unsigned integer type corresponding to T.
     typedef typename UnsignedIntegerType<sizeof(T)>::type U;
-    U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value);
+    U equivalent_unsigned_integer_value = bit_cast<U, T>(value);
     // Write the value byte by byte in a little-endian fashion.
     for (size_t i = 0; i < sizeof(U); ++i) {
       *ComputeInternalPointer<uint8_t>(offset + i) =
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index ffee59e..c1f7594 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -401,7 +401,9 @@
 
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
-  if (UNLIKELY(!runtime->IsStarted())) {
+  // If the runtime is not yet started or it is required by the debugger, then perform the
+  // Invocation by the interpreter.
+  if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
       art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
     } else {
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 97052f1..c368dc6 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -45,7 +45,7 @@
     bool is_variable_size = IsVariableSize<kVerifyFlags, kReadBarrierOption>();
     CHECK(!is_variable_size) << " class=" << PrettyTypeOf(this);
   }
-  return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_));
+  return GetField32(ObjectSizeOffset());
 }
 
 inline Class* Class::GetSuperClass() {
@@ -523,7 +523,7 @@
       << " IsArtField=" << (this == ArtField::GetJavaLangReflectArtField())
       << " IsArtMethod=" << (this == ArtMethod::GetJavaLangReflectArtMethod())
       << " descriptor=" << PrettyDescriptor(this);
-  return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+  return GetField32<kVerifyFlags>(AccessFlagsOffset());
 }
 
 inline String* Class::GetName() {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index e7f7c6e..2dff383 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -204,6 +204,9 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static MemberOffset AccessFlagsOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, access_flags_);
+  }
 
   void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -527,6 +530,9 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static MemberOffset ObjectSizeOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, object_size_);
+  }
 
   void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!IsVariableSize());
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 84b18ab..bd043a8 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -30,6 +30,12 @@
 #include "ScopedUtfChars.h"
 #include "verify_object-inl.h"
 
+#include <sstream>
+#ifdef HAVE_ANDROID_OS
+// This function is provided by android linker.
+extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
+#endif  // HAVE_ANDROID_OS
+
 namespace art {
 
 static void Runtime_gc(JNIEnv*, jclass) {
@@ -46,30 +52,53 @@
   exit(status);
 }
 
-static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPath) {
-  // TODO: returns NULL on success or an error message describing the failure on failure. This
-  // should be refactored in terms of suppressed exceptions.
-  ScopedUtfChars filename(env, javaFilename);
-  if (filename.c_str() == NULL) {
-    return NULL;
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) {
+#ifdef HAVE_ANDROID_OS
+  std::stringstream ss;
+  if (javaLdLibraryPathJstr != nullptr) {
+    ScopedUtfChars javaLdLibraryPath(env, javaLdLibraryPathJstr);
+    if (javaLdLibraryPath.c_str() != nullptr) {
+      ss << javaLdLibraryPath.c_str();
+    }
   }
 
-  if (javaLdLibraryPath != NULL) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
-    if (ldLibraryPath.c_str() == NULL) {
-      return NULL;
-    }
-    void* sym = dlsym(RTLD_DEFAULT, "android_update_LD_LIBRARY_PATH");
-    if (sym != NULL) {
-      typedef void (*Fn)(const char*);
-      Fn android_update_LD_LIBRARY_PATH = reinterpret_cast<Fn>(sym);
-      (*android_update_LD_LIBRARY_PATH)(ldLibraryPath.c_str());
-    } else {
-      LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  if (javaDexPathJstr != nullptr) {
+    ScopedUtfChars javaDexPath(env, javaDexPathJstr);
+    if (javaDexPath.c_str() != nullptr) {
+      std::vector<std::string> dexPathVector;
+      Split(javaDexPath.c_str(), ':', &dexPathVector);
+
+      for (auto abi : art::Runtime::Current()->GetCpuAbilist()) {
+        for (auto zip_path : dexPathVector) {
+          // Native libraries live under lib/<abi>/ inside .apk file.
+          ss << ":" << zip_path << "!" << "lib/" << abi;
+        }
+      }
     }
   }
 
+  std::string ldLibraryPathStr = ss.str();
+  const char* ldLibraryPath = ldLibraryPathStr.c_str();
+  if (*ldLibraryPath == ':') {
+    ++ldLibraryPath;
+  }
+
+  android_update_LD_LIBRARY_PATH(ldLibraryPath);
+#else
+  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  UNUSED(javaLdLibraryPathJstr, javaDexPathJstr, env);
+#endif
+}
+
+static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
+                                  jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (filename.c_str() == nullptr) {
+    return nullptr;
+  }
+
+  SetLdLibraryPath(env, javaLdLibraryPathJstr, javaDexPathJstr);
+
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
@@ -101,7 +130,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index a53aeaa..337c5df 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -255,6 +255,9 @@
           .IntoKey(M::ZygoteMaxFailedBoots)
       .Define("-Xno-dex-file-fallback")
           .IntoKey(M::NoDexFileFallback)
+      .Define("--cpu-abilist=_")
+          .WithType<std::string>()
+          .IntoKey(M::CpuAbiList)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 2d6b6b3..32bfdaf 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -153,7 +153,10 @@
   }
 
   static bool IsIntegralType(Type type) {
+    // The Java language does not allow treating boolean as an integral type but
+    // our bit representation makes it safe.
     switch (type) {
+      case kPrimBoolean:
       case kPrimByte:
       case kPrimChar:
       case kPrimShort:
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0f0c327..2dacfe2 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -792,6 +792,8 @@
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
+  Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
+
   if (runtime_options.GetOrDefault(Opt::Interpret)) {
     GetInstrumentation()->ForceInterpretOnly();
   }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 7f33547..9a04835 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -524,6 +524,10 @@
     return allow_dex_file_fallback_;
   }
 
+  const std::vector<std::string>& GetCpuAbilist() const {
+    return cpu_abilist_;
+  }
+
   bool RunningOnValgrind() const {
     return running_on_valgrind_;
   }
@@ -706,6 +710,9 @@
   // available/usable.
   bool allow_dex_file_fallback_;
 
+  // List of supported cpu abis.
+  std::vector<std::string> cpu_abilist_;
+
   // Specifies target SDK version to allow workarounds for certain API levels.
   int32_t target_sdk_version_;
 
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 8775f8d..1f273cf 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -62,7 +62,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                DumpJITInfoOnShutdown)
 RUNTIME_OPTIONS_KEY (Unit,                IgnoreMaxFootprint)
 RUNTIME_OPTIONS_KEY (Unit,                LowMemoryMode)
-RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        false)
+RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        kUseTlab)
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,      false)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold, jit::Jit::kDefaultCompileThreshold)
@@ -104,6 +104,7 @@
                                           ImageCompilerOptions)  // -Ximage-compiler-option ...
 RUNTIME_OPTIONS_KEY (bool,                Verify,                         true)
 RUNTIME_OPTIONS_KEY (std::string,         NativeBridge)
+RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 
 // Not parse-able from command line, but can be provided explicitly.
 RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
diff --git a/runtime/thread.h b/runtime/thread.h
index da7af83..9d4d89d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -573,6 +573,21 @@
         OFFSETOF_MEMBER(tls_ptr_sized_values, suspend_trigger));
   }
 
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalPosOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_pos));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalEndOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_end));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalObjectsOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
+  }
+
   // Size of stack less any space reserved for stack overflow
   size_t GetStackSize() const {
     return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 39c8d15..3d13c3e 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -67,15 +67,39 @@
 
 void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
   while (char_count--) {
-    uint16_t ch = *utf16_in++;
+    const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
+      // char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
       if (ch > 0x07ff) {
+        // Three byte encoding.
         *utf8_out++ = (ch >> 12) | 0xe0;
         *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
         *utf8_out++ = (ch & 0x3f) | 0x80;
       } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
         *utf8_out++ = (ch >> 6) | 0xc0;
         *utf8_out++ = (ch & 0x3f) | 0x80;
       }
@@ -147,15 +171,32 @@
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
   while (char_count--) {
-    uint16_t ch = *chars++;
+    const uint16_t ch = *chars++;
     if (ch > 0 && ch <= 0x7f) {
       ++result;
-    } else {
-      if (ch > 0x7ff) {
-        result += 3;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
       } else {
-        result += 2;
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
       }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
     }
   }
   return result;
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 8048bbd..94a6ea5 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,8 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <vector>
+
 namespace art {
 
 class UtfTest : public CommonRuntimeTest {};
@@ -110,4 +112,52 @@
   EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding)));
 }
 
+static void AssertConversion(const std::vector<uint16_t> input,
+                             const std::vector<uint8_t> expected) {
+  ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
+
+  std::vector<uint8_t> output(expected.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  EXPECT_EQ(expected, output);
+}
+
+TEST_F(UtfTest, CountAndConvertUtf8Bytes) {
+  // Surrogate pairs will be converted into 4 byte sequences.
+  AssertConversion({ 0xd801, 0xdc00 }, { 0xf0, 0x90, 0x90, 0x80 });
+
+  // Three byte encodings that are below & above the leading surrogate
+  // range respectively.
+  AssertConversion({ 0xdef0 }, { 0xed, 0xbb, 0xb0 });
+  AssertConversion({ 0xdcff }, { 0xed, 0xb3, 0xbf });
+  // Two byte encoding.
+  AssertConversion({ 0x0101 }, { 0xc4, 0x81 });
+
+  // Two byte special case : 0 must use an overlong encoding.
+  AssertConversion({ 0x0101, 0x0000 }, { 0xc4, 0x81, 0xc0, 0x80 });
+
+  // One byte encoding.
+  AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
+
+  AssertConversion({
+      0xd802, 0xdc02,  // Surrogate pair
+      0xdef0, 0xdcff,  // Three byte encodings
+      0x0101, 0x0000,  // Two byte encodings
+      'p'   , 'p'      // One byte encoding
+    }, {
+      0xf0, 0x90, 0xa0, 0x82,
+      0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
+      0xc4, 0x81, 0xc0, 0x80,
+      0x70, 0x70
+    });
+}
+
+TEST_F(UtfTest, CountAndConvertUtf8Bytes_UnpairedSurrogate) {
+  // Unpaired trailing surrogate at the end of input.
+  AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
+  // Unpaired (or incorrectly paired) surrogates in the middle of the input.
+  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
+  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
+  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 9a9f51a..e20412e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -311,19 +311,6 @@
   return (ch < ' ' || ch > '~');
 }
 
-// Interpret the bit pattern of input (type U) as type V. Requires the size
-// of V >= size of U (compile-time checked).
-template<typename U, typename V>
-static inline V bit_cast(U in) {
-  static_assert(sizeof(U) <= sizeof(V), "Size of U not <= size of V");
-  union {
-    U u;
-    V v;
-  } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index c67a58a..47e9bf5 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1086,7 +1086,7 @@
   const uint16_t* insns = code_item_->insns_ + cur_offset;
   /* make sure the start of the switch is in range */
   int32_t switch_offset = insns[1] | ((int32_t) insns[2]) << 16;
-  if ((int32_t) cur_offset + switch_offset < 0 || cur_offset + switch_offset + 2 >= insn_count) {
+  if ((int32_t) cur_offset + switch_offset < 0 || cur_offset + switch_offset + 2 > insn_count) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid switch start: at " << cur_offset
                                       << ", switch offset " << switch_offset
                                       << ", count " << insn_count;
@@ -2748,7 +2748,8 @@
         auto* klass = declaring_class.GetClass();
         for (uint32_t i = 0, num_fields = klass->NumInstanceFields(); i < num_fields; ++i) {
           if (klass->GetInstanceField(i)->IsFinal()) {
-            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-no-barrier not expected";
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-no-barrier not expected for "
+                << PrettyField(klass->GetInstanceField(i));
             break;
           }
         }
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 78185bf..00b4cef 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -269,7 +269,7 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 40be56c..76ef4a9 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -57,14 +57,15 @@
       // We eliminate the non-live registers at a return, so only v3 is live.
       // Note that it is OK for a compiler to not have a dex map at this dex PC because
       // a return is not necessarily a safepoint.
-      CHECK_REGS_CONTAIN_REFS(0x13U, false);  // v3: y
-      CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x13U, false, 3);  // v3: y
+      // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
+      CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
       // v5 is removed from the root set because there is a "merge" operation.
       // See 0015: if-nez v2, 001f.
-      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
       CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
diff --git a/test/134-reg-promotion/smali/Test.smali b/test/134-reg-promotion/smali/Test.smali
index 6a35c45..68d29dd 100644
--- a/test/134-reg-promotion/smali/Test.smali
+++ b/test/134-reg-promotion/smali/Test.smali
@@ -36,3 +36,28 @@
    :end
    return-void
 .end method
+
+.method public static run2()V
+   .registers 4
+   new-instance v2, Ljava/lang/String;
+   invoke-direct {v2}, Ljava/lang/String;-><init>()V
+   const/4 v0, 0
+   move v1, v0
+   :start
+   invoke-static {}, LMain;->blowup()V
+   if-ne v1, v0, :end
+   const/4 v2, 1
+   invoke-static {v2}, Ljava/lang/Integer;->toString(I)Ljava/lang/String;
+   move-result-object v3
+   if-nez v3, :skip
+   const/4 v0, 0
+   :skip
+   # The Phi merging 0 with 0 hides the constant from the Quick compiler.
+   move v2, v0
+   # The call makes v2 float type.
+   invoke-static {v2}, Ljava/lang/Float;->isNaN(F)Z
+   const/4 v1, 1
+   goto :start
+   :end
+   return-void
+.end method
diff --git a/test/134-reg-promotion/src/Main.java b/test/134-reg-promotion/src/Main.java
index d45ec66..008ac58 100644
--- a/test/134-reg-promotion/src/Main.java
+++ b/test/134-reg-promotion/src/Main.java
@@ -38,5 +38,11 @@
             m.invoke(null, (Object[]) null);
             holder = null;
         }
+        m = c.getMethod("run2", (Class[]) null);
+        for (int i = 0; i < 10; i++) {
+            holder = new char[128 * 1024][];
+            m.invoke(null, (Object[]) null);
+            holder = null;
+        }
     }
 }
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 937d2fe..6b4bc11 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -55,7 +55,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 5, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(1.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(1.0f);
       CHECK_EQ(value, cast);
 
       CHECK(GetVReg(m, 6, kIntVReg, &value));
@@ -95,7 +95,7 @@
       CHECK_EQ(value, 0u);
 
       CHECK(GetVRegPair(m, 13, kDoubleLoVReg, kDoubleHiVReg, &value));
-      uint64_t cast = bit_cast<double, uint64_t>(2.0);
+      uint64_t cast = bit_cast<uint64_t, double>(2.0);
       CHECK_EQ(value, cast);
     }
 
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
index 24d7832..0a83ac0 100644
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ b/test/455-set-vreg/set_vreg_jni.cc
@@ -60,21 +60,21 @@
       CHECK(GetVReg(m, 1, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVReg(m, 2, bit_cast<float, uint32_t>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<float, uint32_t>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<float, uint32_t>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<float, uint32_t>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<float, uint32_t>(1.0f), kFloatVReg));
+      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
+      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
+      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
+      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
+      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
     } else if (m_name.compare("testDoubleVReg") == 0) {
       uint32_t value = 0;
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVRegPair(m, 4, bit_cast<double, uint64_t>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<double, uint64_t>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<double, uint64_t>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<double, uint64_t>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<double, uint64_t>(1.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
     }
 
     return true;
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index ce701e8..1b32348 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -69,7 +69,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(4.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(4.0f);
       CHECK_EQ(value, cast);
       did_check_ = true;
     } else if (m_name.compare("phiEquivalent") == 0) {
@@ -138,7 +138,7 @@
   std::unique_ptr<Context> context(Context::Create());
   CHECK(soa.Decode<mirror::Object*>(main) == nullptr);
   CHECK_EQ(int_value, 0);
-  int32_t cast = bit_cast<float, int32_t>(float_value);
+  int32_t cast = bit_cast<int32_t, float>(float_value);
   CHECK_EQ(cast, 0);
   TestVisitor visitor(soa.Self(), context.get());
   visitor.WalkStack();
diff --git a/test/463-checker-boolean-simplifier/expected.txt b/test/463-checker-boolean-simplifier/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/expected.txt
diff --git a/test/463-checker-boolean-simplifier/info.txt b/test/463-checker-boolean-simplifier/info.txt
new file mode 100644
index 0000000..9c0493a
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/info.txt
@@ -0,0 +1 @@
+Tests simplification of boolean NOT in optimizing compiler.
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
new file mode 100644
index 0000000..efe0d3f
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -0,0 +1,174 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // Note #1: `javac` flips the conditions of If statements.
+  // Note #2: In the optimizing compiler, the first input of Phi is always
+  //          the fall-through path, i.e. the false branch.
+
+  public static void assertBoolEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /*
+   * Elementary test negating a boolean. Verifies that the condition is replaced,
+   * blocks merged and empty branches removed.
+   */
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[NotEq:z\d+]]    NotEqual [ [[Param]] [[Const0]] ]
+  // CHECK-DAG:                       If [ [[NotEq]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  // CHECK:                           Goto
+  // CHECK:                           Goto
+  // CHECK:                           Goto
+  // CHECK-NOT:                       Goto
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Eq:z\d+]]       Equal [ [[Param]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Eq]] ]
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK-NOT:                       NotEqual
+  // CHECK-NOT:                       If
+  // CHECK-NOT:                       Phi
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK:                           Goto
+  // CHECK-NOT:                       Goto
+
+  public static boolean BooleanNot(boolean x) {
+    return !x;
+  }
+
+  /*
+   * Program which only delegates the condition, i.e. returns 1 when True
+   * and 0 when False.
+   */
+
+  // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const0]] [[Const1]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       Return [ [[Cond]] ]
+
+  public static boolean GreaterThan(int x, int y) {
+    return (x <= y) ? false : true;
+  }
+
+  /*
+   * Program which negates a condition, i.e. returns 0 when True
+   * and 1 when False.
+   */
+
+  // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThanOrEqual [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     LessThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       Return [ [[Cond]] ]
+
+  public static boolean LessThan(int x, int y) {
+    return (x < y) ? true : false;
+  }
+
+  /*
+   * Program which further uses negated conditions.
+   * Note that Phis are discovered retrospectively.
+   */
+
+  // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[CondXY:z\d+]]   GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[CondXY]] ]
+  // CHECK-DAG:     [[CondYZ:z\d+]]   GreaterThan [ [[ParamY]] [[ParamZ]] ]
+  // CHECK-DAG:                       If [ [[CondYZ]] ]
+  // CHECK-DAG:     [[CondXYZ:z\d+]]  NotEqual [ [[PhiXY:i\d+]] [[PhiYZ:i\d+]] ]
+  // CHECK-DAG:                       If [ [[CondXYZ]] ]
+  // CHECK-DAG:                       Return [ [[PhiXYZ:i\d+]] ]
+  // CHECK-DAG:     [[PhiXY]]         Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     [[PhiYZ]]         Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     [[PhiXYZ]]        Phi [ [[Const1]] [[Const0]] ]
+
+  // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[CmpXY:z\d+]]    LessThanOrEqual [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:     [[CmpYZ:z\d+]]    LessThanOrEqual [ [[ParamY]] [[ParamZ]] ]
+  // CHECK-DAG:     [[CmpXYZ:z\d+]]   Equal [ [[CmpXY]] [[CmpYZ]] ]
+  // CHECK-DAG:                       Return [ [[CmpXYZ]] ]
+
+  public static boolean ValuesOrdered(int x, int y, int z) {
+    return (x <= y) == (y <= z);
+  }
+
+  public static void main(String[] args) {
+    assertBoolEquals(false, BooleanNot(true));
+    assertBoolEquals(true, BooleanNot(false));
+    assertBoolEquals(true, GreaterThan(10, 5));
+    assertBoolEquals(false, GreaterThan(10, 10));
+    assertBoolEquals(false, GreaterThan(5, 10));
+    assertBoolEquals(true, LessThan(5, 10));
+    assertBoolEquals(false, LessThan(10, 10));
+    assertBoolEquals(false, LessThan(10, 5));
+    assertBoolEquals(true, ValuesOrdered(1, 3, 5));
+    assertBoolEquals(true, ValuesOrdered(5, 3, 1));
+    assertBoolEquals(false, ValuesOrdered(1, 3, 2));
+    assertBoolEquals(false, ValuesOrdered(2, 3, 1));
+    assertBoolEquals(true, ValuesOrdered(3, 3, 3));
+    assertBoolEquals(true, ValuesOrdered(3, 3, 5));
+    assertBoolEquals(false, ValuesOrdered(5, 5, 3));
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 019dc14..5922257 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -14,4 +14,5 @@
 b/18800943 (2)
 MoveExc
 MoveExceptionOnEntry
+EmptySparseSwitch
 Done!
diff --git a/test/800-smali/smali/EmptySparseSwitch.smali b/test/800-smali/smali/EmptySparseSwitch.smali
new file mode 100644
index 0000000..29592c1
--- /dev/null
+++ b/test/800-smali/smali/EmptySparseSwitch.smali
@@ -0,0 +1,17 @@
+.class public LEmptySparseSwitch;
+
+.super Ljava/lang/Object;
+
+.method public static run()V
+    .registers 2
+
+    const v0, 0
+
+    sparse-switch v0, :SparseSwitch
+
+    return-void
+
+    :SparseSwitch
+    .sparse-switch
+    .end sparse-switch
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index b23896d..3e0b1f9 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -77,6 +77,8 @@
                 null));
         testCases.add(new TestCase("MoveExceptionOnEntry", "MoveExceptionOnEntry",
             "moveExceptionOnEntry", new Object[]{0}, new VerifyError(), null));
+        testCases.add(new TestCase("EmptySparseSwitch", "EmptySparseSwitch", "run", null, null,
+                null));
     }
 
     public void runTests() {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 9755efb..28fbc3e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -95,9 +95,9 @@
   RELOCATE_TYPES += no-relocate
 endif
 ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true)
-  RELOCATE_TYPES := relocate-no-patchoat
+  RELOCATE_TYPES := relocate-npatchoat
 endif
-TRACE_TYPES := no-trace
+TRACE_TYPES := ntrace
 ifeq ($(ART_TEST_TRACE),true)
   TRACE_TYPES += trace
 endif
@@ -119,7 +119,7 @@
 ifeq ($(ART_TEST_PIC_IMAGE),true)
   IMAGE_TYPES += picimage
 endif
-PICTEST_TYPES := nopictest
+PICTEST_TYPES := npictest
 ifeq ($(ART_TEST_PIC_TEST),true)
   PICTEST_TYPES += pictest
 endif
@@ -130,7 +130,7 @@
 ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true)
   RUN_TYPES += ndebug
 endif
-DEBUGGABLE_TYPES := nondebuggable
+DEBUGGABLE_TYPES := ndebuggable
 ifeq ($(ART_TEST_RUN_TEST_DEBUGGABLE),true)
 DEBUGGABLE_TYPES += debuggable
 endif
@@ -272,9 +272,9 @@
       $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
-ifneq (,$(filter relocate-no-patchoat,$(RELOCATE_TYPES)))
+ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), relocate-no-patchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(COMPILER_TYPES), relocate-npatchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -375,7 +375,7 @@
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),nondebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+      $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS :=
@@ -461,10 +461,10 @@
 
 # Create a rule to build and run a tests following the form:
 # test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
-#    {4: interpreter default optimizing jit}-{5: relocate no-relocate relocate-no-patchoat}-
-#    {6: trace or no-trace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
-#    {9: no-image image picimage}-{10: pictest nopictest}-
-#    {11: nondebuggable debuggable}-{12: test name}{13: 32 or 64}
+#    {4: interpreter default optimizing jit}-{5: relocate nrelocate relocate-npatchoat}-
+#    {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
+#    {9: no-image image picimage}-{10: pictest npictest}-
+#    {11: ndebuggable debuggable}-{12: test name}{13: 32 or 64}
 define define-test-art-run-test
   run_test_options :=
   prereq_rule :=
@@ -543,7 +543,7 @@
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_RELOCATE_RULES
       run_test_options += --no-relocate
     else
-      ifeq ($(5),relocate-no-patchoat)
+      ifeq ($(5),relocate-npatchoat)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_NO_PATCHOAT_RULES
         run_test_options += --relocate --no-patchoat
       else
@@ -555,7 +555,7 @@
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES
     run_test_options += --trace
   else
-    ifeq ($(6),no-trace)
+    ifeq ($(6),ntrace)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_TRACE_RULES
     else
       $$(error found $(6) expected $(TRACE_TYPES))
@@ -635,7 +635,7 @@
   ifeq ($(10),pictest)
     run_test_options += --pic-test
   else
-    ifeq ($(10),nopictest)
+    ifeq ($(10),npictest)
       # Nothing to be done.
     else
       $$(error found $(10) expected $(PICTEST_TYPES))
@@ -645,7 +645,7 @@
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUGGABLE_RULES
     run_test_options += --debuggable
   else
-    ifeq ($(11),nondebuggable)
+    ifeq ($(11),ndebuggable)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NONDEBUGGABLE_RULES
       # Nothing to be done.
     else
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 840ff80..414e4df 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -267,7 +267,7 @@
 if [ "$JIT" = "y" ]; then
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
     else
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
diff --git a/test/run-test b/test/run-test
index df0fce4..2873a35 100755
--- a/test/run-test
+++ b/test/run-test
@@ -441,8 +441,8 @@
         echo "    --build-only          Build test files only (off by default)."
         echo "    --interpreter         Enable interpreter only mode (off by default)."
         echo "    --jit                 Enable jit (off by default)."
-        echo "    --optimizing          Enable optimizing compiler (off by default)."
-        echo "    --quick               Use Quick compiler (default)."
+        echo "    --optimizing          Enable optimizing compiler (default)."
+        echo "    --quick               Use Quick compiler (off by default)."
         echo "    --no-verify           Turn off verification (on by default)."
         echo "    --no-optimize         Turn off optimization (on by default)."
         echo "    --no-precise          Turn off precise GC (on by default)."