Merge changes I4b3b4d90,I70e0d78f,I2848636f

* changes:
  Forbid the use of shifts in ShifterOperand in Thumb2
  Make subs and adds alter flags when rn is an immediate
  Inline long shift code
diff --git a/Android.mk b/Android.mk
index c740a0d..216e865 100644
--- a/Android.mk
+++ b/Android.mk
@@ -360,6 +360,7 @@
 		--boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \
 		--dex-location=/$(1) --oat-file=$$@ \
 		--instruction-set=$(DEX2OAT_TARGET_ARCH) \
+		--instruction-set-variant=$(DEX2OAT_TARGET_CPU_VARIANT) \
 		--instruction-set-features=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
 		--android-root=$(PRODUCT_OUT)/system --include-patch-information \
 		--runtime-arg -Xnorelocate
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 08b4ec2..8f00298 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -219,6 +219,10 @@
   art_cflags += -DART_USE_READ_BARRIER=1
 endif
 
+ifeq ($(ART_USE_TLAB),true)
+  art_cflags += -DART_USE_TLAB=1
+endif
+
 # Cflags for non-debug ART and ART tools.
 art_non_debug_cflags := \
   -O3
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 6b6a9e0..91998fa 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -159,6 +159,7 @@
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/jit_code_cache_test.cc \
   runtime/leb128_test.cc \
   runtime/mem_map_test.cc \
   runtime/memory_region_test.cc \
@@ -166,6 +167,7 @@
   runtime/mirror/object_test.cc \
   runtime/monitor_pool_test.cc \
   runtime/monitor_test.cc \
+  runtime/oat_file_test.cc \
   runtime/oat_file_assistant_test.cc \
   runtime/parsed_options_test.cc \
   runtime/reference_table_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 4d2fa41..710b130 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -230,6 +230,7 @@
 	  $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$(PRIVATE_CORE_OAT_NAME) \
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(3)TARGET_ARCH) \
+	  --instruction-set-variant=$$($(3)DEX2OAT_TARGET_CPU_VARIANT) \
 	  --instruction-set-features=$$($(3)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
 	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1)
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 0906753..6b0e6ff 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -94,6 +94,7 @@
 	jni/quick/x86_64/calling_convention_x86_64.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
+	optimizing/boolean_simplifier.cc \
 	optimizing/builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/code_generator.cc \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 7edb490..39725de 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -345,6 +345,7 @@
 enum MIROptimizationFlagPositions {
   kMIRIgnoreNullCheck = 0,
   kMIRIgnoreRangeCheck,
+  kMIRIgnoreCheckCast,
   kMIRStoreNonNullValue,              // Storing non-null value, always mark GC card.
   kMIRClassIsInitialized,
   kMIRClassIsInDexCache,
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index fcefb6f..548b6f8 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -180,22 +180,21 @@
 }
 
 void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
-  DCHECK(inst->Opcode() == Instruction::RETURN_VOID);
-  // Are we compiling a non-clinit constructor?
-  if (!unit_.IsConstructor() || unit_.IsStatic()) {
-    return;
+  DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID);
+  if (unit_.IsConstructor()) {
+    // Are we compiling a non clinit constructor which needs a barrier ?
+    if (!unit_.IsStatic() &&
+        driver_.RequiresConstructorBarrier(Thread::Current(), unit_.GetDexFile(),
+                                           unit_.GetClassDefIndex())) {
+      return;
+    }
   }
-  // Do we need a constructor barrier ?
-  if (!driver_.RequiresConstructorBarrier(Thread::Current(), unit_.GetDexFile(),
-                                         unit_.GetClassDefIndex())) {
-    return;
-  }
-  // Replace RETURN_VOID by RETURN_VOID_BARRIER.
+  // Replace RETURN_VOID by RETURN_VOID_NO_BARRIER.
   VLOG(compiler) << "Replacing " << Instruction::Name(inst->Opcode())
-                 << " by " << Instruction::Name(Instruction::RETURN_VOID_BARRIER)
+                 << " by " << Instruction::Name(Instruction::RETURN_VOID_NO_BARRIER)
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                  << PrettyMethod(unit_.GetDexMethodIndex(), GetDexFile(), true);
-  inst->SetOpcode(Instruction::RETURN_VOID_BARRIER);
+  inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER);
 }
 
 Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index ab3c946..30e3ce0 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -16,6 +16,7 @@
 
 #include "global_value_numbering.h"
 
+#include "base/bit_vector-inl.h"
 #include "base/stl_util.h"
 #include "local_value_numbering.h"
 
@@ -206,4 +207,41 @@
   return true;
 }
 
+bool GlobalValueNumbering::IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_id) {
+  DCHECK_NE(cond, kNoValue);
+  BasicBlock* bb = mir_graph_->GetBasicBlock(bb_id);
+  if (bb->predecessors.size() == 1u) {
+    BasicBlockId pred_id = bb->predecessors[0];
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
+    if (pred_bb->last_mir_insn != nullptr) {
+      Instruction::Code opcode = pred_bb->last_mir_insn->dalvikInsn.opcode;
+      if ((opcode == Instruction::IF_NEZ && pred_bb->taken == bb_id) ||
+          (opcode == Instruction::IF_EQZ && pred_bb->fall_through == bb_id)) {
+        DCHECK(lvns_[pred_id] != nullptr);
+        uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]);
+        if (operand == cond) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool GlobalValueNumbering::IsTrueInBlock(uint16_t cond, BasicBlockId bb_id) {
+  // We're not doing proper value propagation, so just see if the condition is used
+  // with if-nez/if-eqz to branch/fall-through to this bb or one of its dominators.
+  DCHECK_NE(cond, kNoValue);
+  if (IsBlockEnteredOnTrue(cond, bb_id)) {
+    return true;
+  }
+  BasicBlock* bb = mir_graph_->GetBasicBlock(bb_id);
+  for (uint32_t dom_id : bb->dominators->Indexes()) {
+    if (IsBlockEnteredOnTrue(cond, dom_id)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index 6fa658c..bd2f187 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -200,6 +200,9 @@
 
   bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
 
+  bool IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_id);
+  bool IsTrueInBlock(uint16_t cond, BasicBlockId bb_id);
+
   ScopedArenaAllocator* Allocator() const {
     return allocator_;
   }
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index b91c3ca..b4559ef 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -136,6 +136,7 @@
     { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
 #define DEF_BINOP(bb, opcode, result, src1, src2) \
     { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
+#define DEF_UNOP(bb, opcode, result, src) DEF_MOVE(bb, opcode, result, src)
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
@@ -2315,4 +2316,95 @@
   }
 }
 
+TEST_F(GlobalValueNumberingTestDiamond, CheckCastDiamond) {
+  static const MIRDef mirs[] = {
+      DEF_UNOP(3u, Instruction::INSTANCE_OF, 0u, 100u),
+      DEF_UNOP(3u, Instruction::INSTANCE_OF, 1u, 200u),
+      DEF_IFZ(3u, Instruction::IF_NEZ, 0u),
+      DEF_INVOKE1(4u, Instruction::CHECK_CAST, 100u),
+      DEF_INVOKE1(5u, Instruction::CHECK_CAST, 100u),
+      DEF_INVOKE1(5u, Instruction::CHECK_CAST, 200u),
+      DEF_INVOKE1(5u, Instruction::CHECK_CAST, 100u),
+      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u),
+  };
+
+  static const bool expected_ignore_check_cast[] = {
+      false,  // instance-of
+      false,  // instance-of
+      false,  // if-nez
+      false,  // Not eliminated, fall-through branch.
+      true,   // Eliminated.
+      false,  // Not eliminated, different value.
+      false,  // Not eliminated, different type.
+      false,  // Not eliminated, bottom block.
+  };
+
+  PrepareMIRs(mirs);
+  mirs_[0].dalvikInsn.vC = 1234;  // type for instance-of
+  mirs_[1].dalvikInsn.vC = 1234;  // type for instance-of
+  mirs_[3].dalvikInsn.vB = 1234;  // type for check-cast
+  mirs_[4].dalvikInsn.vB = 1234;  // type for check-cast
+  mirs_[5].dalvikInsn.vB = 1234;  // type for check-cast
+  mirs_[6].dalvikInsn.vB = 4321;  // type for check-cast
+  mirs_[7].dalvikInsn.vB = 1234;  // type for check-cast
+  PerformGVN();
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_check_cast), mir_count_);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = expected_ignore_check_cast[i] ? MIR_IGNORE_CHECK_CAST : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
+TEST_F(GlobalValueNumberingTest, CheckCastDominators) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // Block #3, top of the diamond.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(3)),     // Block #4, left side.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #5, right side.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(5)),     // Block #6, right side.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 6)),  // Block #7, bottom.
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNOP(3u, Instruction::INSTANCE_OF, 0u, 100u),
+      DEF_UNOP(3u, Instruction::INSTANCE_OF, 1u, 200u),
+      DEF_IFZ(3u, Instruction::IF_NEZ, 0u),
+      DEF_INVOKE1(4u, Instruction::CHECK_CAST, 100u),
+      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u),
+      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 200u),
+      DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u),
+      DEF_INVOKE1(7u, Instruction::CHECK_CAST, 100u),
+  };
+
+  static const bool expected_ignore_check_cast[] = {
+      false,  // instance-of
+      false,  // instance-of
+      false,  // if-nez
+      false,  // Not eliminated, fall-through branch.
+      true,   // Eliminated.
+      false,  // Not eliminated, different value.
+      false,  // Not eliminated, different type.
+      false,  // Not eliminated, bottom block.
+  };
+
+  PrepareBasicBlocks(bbs);
+  PrepareMIRs(mirs);
+  mirs_[0].dalvikInsn.vC = 1234;  // type for instance-of
+  mirs_[1].dalvikInsn.vC = 1234;  // type for instance-of
+  mirs_[3].dalvikInsn.vB = 1234;  // type for check-cast
+  mirs_[4].dalvikInsn.vB = 1234;  // type for check-cast
+  mirs_[5].dalvikInsn.vB = 1234;  // type for check-cast
+  mirs_[6].dalvikInsn.vB = 4321;  // type for check-cast
+  mirs_[7].dalvikInsn.vB = 1234;  // type for check-cast
+  PerformGVN();
+  PerformGVNCodeModifications();
+  ASSERT_EQ(arraysize(expected_ignore_check_cast), mir_count_);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = expected_ignore_check_cast[i] ? MIR_IGNORE_CHECK_CAST : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 2e7f032..2d4c18f 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -1058,7 +1058,6 @@
     case Instruction::INVOKE_INTERFACE_RANGE:
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::CHECK_CAST:
     case Instruction::THROW:
     case Instruction::FILLED_NEW_ARRAY:
     case Instruction::FILLED_NEW_ARRAY_RANGE:
@@ -1073,6 +1072,12 @@
       uses_all_vregs = true;
       break;
 
+    case Instruction::CHECK_CAST:
+      DCHECK_EQ(mir->ssa_rep->num_uses, 1);
+      must_keep = true;  // Keep for type information even if MIR_IGNORE_CHECK_CAST.
+      uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) == 0;
+      break;
+
     case kMirOpNullCheck:
       DCHECK_EQ(mir->ssa_rep->num_uses, 1);
       if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 99b6683..dc222b5 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -1520,7 +1520,6 @@
     case Instruction::GOTO:
     case Instruction::GOTO_16:
     case Instruction::GOTO_32:
-    case Instruction::CHECK_CAST:
     case Instruction::THROW:
     case Instruction::FILL_ARRAY_DATA:
     case Instruction::PACKED_SWITCH:
@@ -1612,9 +1611,32 @@
       HandleInvokeOrClInitOrAcquireOp(mir);
       break;
 
+    case Instruction::INSTANCE_OF: {
+        uint16_t operand = GetOperandValue(mir->ssa_rep->uses[0]);
+        uint16_t type = mir->dalvikInsn.vC;
+        res = gvn_->LookupValue(Instruction::INSTANCE_OF, operand, type, kNoValue);
+        SetOperandValue(mir->ssa_rep->defs[0], res);
+      }
+      break;
+    case Instruction::CHECK_CAST:
+      if (gvn_->CanModify()) {
+        // Check if there was an instance-of operation on the same value and if we are
+        // in a block where its result is true. If so, we can eliminate the check-cast.
+        uint16_t operand = GetOperandValue(mir->ssa_rep->uses[0]);
+        uint16_t type = mir->dalvikInsn.vB;
+        uint16_t cond = gvn_->FindValue(Instruction::INSTANCE_OF, operand, type, kNoValue);
+        if (cond != kNoValue && gvn_->IsTrueInBlock(cond, Id())) {
+          if (gvn_->GetCompilationUnit()->verbose) {
+            LOG(INFO) << "Removing check-cast at 0x" << std::hex << mir->offset;
+          }
+          // Don't use kMirOpNop. Keep the check-cast as it defines the type of the register.
+          mir->optimization_flags |= MIR_IGNORE_CHECK_CAST;
+        }
+      }
+      break;
+
     case Instruction::MOVE_RESULT:
     case Instruction::MOVE_RESULT_OBJECT:
-    case Instruction::INSTANCE_OF:
       // 1 result, treat as unique each time, use result s_reg - will be unique.
       res = GetOperandValue(mir->ssa_rep->defs[0]);
       SetOperandValue(mir->ssa_rep->defs[0], res);
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index a89b250..3d7a640 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -416,7 +416,7 @@
   // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA}
   kAnInvoke | kAnHeavyWeight,
 
-  // 73 RETURN_VOID_BARRIER
+  // 73 RETURN_VOID_NO_BARRIER
   kAnBranch,
 
   // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN}
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index dfaff6c..f638b0b 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -374,7 +374,7 @@
   // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA}
   DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS,
 
-  // 73 RETURN_VOID_BARRIER
+  // 73 RETURN_VOID_NO_BARRIER
   DF_NOP,
 
   // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN}
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 9da39d1..3298af1 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -150,6 +150,7 @@
 
 #define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
 #define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
+#define MIR_IGNORE_CHECK_CAST           (1 << kMIRIgnoreCheckCast)
 #define MIR_STORE_NON_NULL_VALUE        (1 << kMIRStoreNonNullValue)
 #define MIR_CLASS_IS_INITIALIZED        (1 << kMIRClassIsInitialized)
 #define MIR_CLASS_IS_IN_DEX_CACHE       (1 << kMIRClassIsInDexCache)
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 93749e4..c85c3b6 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -671,6 +671,9 @@
               }
               int dead_true_def = if_true->ssa_rep->defs[0];
               raw_use_counts_[dead_true_def] = use_counts_[dead_true_def] = 0;
+              // Update ending vreg->sreg map for GC maps generation.
+              int def_vreg = SRegToVReg(mir->ssa_rep->defs[0]);
+              bb->data_flow_info->vreg_to_ssa_map_exit[def_vreg] = mir->ssa_rep->defs[0];
               // We want to remove ft and tk and link bb directly to ft_ft. First, we need
               // to update all Phi inputs correctly with UpdatePredecessor(ft->id, bb->id)
               // since the live_def above comes from ft->first_mir_insn (if_false).
@@ -1751,6 +1754,9 @@
     DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0);
     // Non-throwing only if range check has been eliminated.
     return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0);
+  } else if (mir->dalvikInsn.opcode == Instruction::CHECK_CAST &&
+      (opt_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+    return false;
   } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH ||
       static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) {
     // No more checks for these (null check was processed above).
diff --git a/compiler/dex/pass_manager.cc b/compiler/dex/pass_manager.cc
index 6d58f65..6377a6c 100644
--- a/compiler/dex/pass_manager.cc
+++ b/compiler/dex/pass_manager.cc
@@ -33,7 +33,7 @@
   // Add each pass which isn't disabled into default_pass_list_.
   for (const auto* pass : passes_) {
     if (options_.GetDisablePassList().find(pass->GetName()) != std::string::npos) {
-      LOG(INFO) << "Skipping disabled pass " << pass->GetName();
+      VLOG(compiler) << "Skipping disabled pass " << pass->GetName();
     } else {
       default_pass_list_.push_back(pass);
     }
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 8833da3..3e69878 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -848,7 +848,7 @@
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
                  "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
@@ -1502,7 +1502,7 @@
           break;
         }
         case kFixupAdr: {
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[2]);
           LIR* target = lir->target;
           int32_t target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
               : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
@@ -1555,8 +1555,8 @@
         }
         case kFixupMovImmLST: {
           // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
           // If tab_rec is null, this is a literal load. Use target
           LIR* target = lir->target;
           int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
@@ -1565,8 +1565,8 @@
         }
         case kFixupMovImmHST: {
           // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          const LIR* addPCInst = UnwrapPointer<LIR>(lir->operands[2]);
+          const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
           // If tab_rec is null, this is a literal load. Use target
           LIR* target = lir->target;
           int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 1a9dbea..d46c25a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -433,7 +433,7 @@
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
-            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
+            : LIRSlowPath(m2l, branch), restore_lr_(restore_lr),
               sp_displace_(sp_displace) {
         }
         void Compile() OVERRIDE {
@@ -658,7 +658,7 @@
   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
   // as a placeholder for the offset.
   LIR* call = RawLIR(current_dalvik_offset_, kThumb2Bl, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+                     target_method_idx, WrapPointer(target_dex_file), type);
   AppendLIR(call);
   call_method_insns_.push_back(call);
   return call;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 67fabbd..4141bcf 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -200,7 +200,7 @@
     void UpdateIT(LIR* it, const char* new_guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    void OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index ef26323..9193e1b 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1081,9 +1081,10 @@
   return true;
 }
 
-LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
+  LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0);
+  lir->target = target;
 }
 
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 13f9072..9812d9f 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -455,7 +455,7 @@
            case 'T':
              snprintf(tbuf, arraysize(tbuf), "%s", PrettyMethod(
                  static_cast<uint32_t>(lir->operands[1]),
-                 *reinterpret_cast<const DexFile*>(UnwrapPointer(lir->operands[2]))).c_str());
+                 *UnwrapPointer<DexFile>(lir->operands[2])).c_str());
              break;
            case 'u': {
              int offset_1 = lir->operands[0];
@@ -906,9 +906,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kThumb2Bl);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
-
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
       patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
                                                         target_dex_file, target_method_idx));
   }
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index aa5e5b4..329bb1e 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -1003,7 +1003,7 @@
                                       0 : offset_adjustment) + target_lir->offset;
             delta = target_offs - lir->offset;
           } else if (lir->operands[2] >= 0) {
-            EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+            const EmbeddedData* tab = UnwrapPointer<EmbeddedData>(lir->operands[2]);
             delta = tab->offset + offset_adjustment - lir->offset;
           } else {
             // No fixup: this usage allows to retrieve the current PC.
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 8275162..823cb60 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -351,8 +351,8 @@
     if (generate_explicit_stack_overflow_check) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
-        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
-              LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr),
+        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
+            : LIRSlowPath(m2l, branch),
               sp_displace_(sp_displace) {
         }
         void Compile() OVERRIDE {
@@ -525,7 +525,7 @@
   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
   // as a placeholder for the offset.
   LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
-                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+                     target_method_idx, WrapPointer(target_dex_file), type);
   AppendLIR(call);
   call_method_insns_.push_back(call);
   return call;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index d5f0536..54fd46d 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -203,7 +203,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
   void OpEndIT(LIR* it) OVERRIDE;
   LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
   LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
   void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index a8ec6c0..49b15fe 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -449,7 +449,7 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
-  int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f));
+  int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f));
   A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 92675f3..2372ccc 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -937,9 +937,10 @@
   return true;
 }
 
-LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, kA64Ldr2rp, As32BitReg(reg).GetReg(), 0, 0, 0, 0, target);
+  LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0);
+  lir->target = target;
 }
 
 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 136be94..09a34bf 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -851,9 +851,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kA64Bl1t);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
-
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
       patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
                                                         target_dex_file, target_method_idx));
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 029c0ca..bd479be 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -16,6 +16,7 @@
 
 #include "mir_to_lir-inl.h"
 
+#include "base/bit_vector-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -88,6 +89,8 @@
   inst->u.m.def_mask = &kEncodeAll;
   LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
   DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
+  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
+  safepoints_.emplace_back(safepoint_pc, current_mir_);
 }
 
 void Mir2Lir::MarkSafepointPCAfter(LIR* after) {
@@ -102,6 +105,8 @@
     InsertLIRAfter(after, safepoint_pc);
   }
   DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll));
+  DCHECK(current_mir_ != nullptr || (current_dalvik_offset_ == 0 && safepoints_.empty()));
+  safepoints_.emplace_back(safepoint_pc, current_mir_);
 }
 
 /* Remove a LIR from the list. */
@@ -217,7 +222,7 @@
       }
       LOG(INFO) << "-------- dalvik offset: 0x" << std::hex
                 << lir->dalvik_offset << " @ "
-                << reinterpret_cast<char*>(UnwrapPointer(lir->operands[0]));
+                << UnwrapPointer<char>(lir->operands[0]);
       break;
     case kPseudoExitBlock:
       LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest;
@@ -411,7 +416,7 @@
 LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) {
   while (data_target) {
     if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index &&
-        UnwrapPointer(data_target->operands[1]) == method.dex_file) {
+        UnwrapPointer<DexFile>(data_target->operands[1]) == method.dex_file) {
       return data_target;
     }
     data_target = data_target->next;
@@ -423,7 +428,7 @@
 LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) {
   while (data_target) {
     if (static_cast<uint32_t>(data_target->operands[0]) == type_idx &&
-        UnwrapPointer(data_target->operands[1]) == &dex_file) {
+        UnwrapPointer<DexFile>(data_target->operands[1]) == &dex_file) {
       return data_target;
     }
     data_target = data_target->next;
@@ -486,8 +491,7 @@
   data_lir = code_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
                                               target_dex_file, target_method_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -496,8 +500,7 @@
   data_lir = method_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_method_idx = data_lir->operands[0];
-    const DexFile* target_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
                                                 target_dex_file, target_method_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -507,8 +510,7 @@
   data_lir = class_literal_list_;
   while (data_lir != nullptr) {
     uint32_t target_type_idx = data_lir->operands[0];
-    const DexFile* class_dex_file =
-      reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
+    const DexFile* class_dex_file = UnwrapPointer<DexFile>(data_lir->operands[1]);
     patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
                                               class_dex_file, target_type_idx));
     PushUnpatchedReference(&code_buffer_);
@@ -767,6 +769,71 @@
 }
 
 void Mir2Lir::CreateNativeGcMap() {
+  if (UNLIKELY((cu_->disable_opt & (1u << kPromoteRegs)) != 0u)) {
+    // If we're not promoting to physical registers, it's safe to use the verifier's notion of
+    // references. (We disable register promotion when type inference finds a type conflict and
+    // in that the case we defer to the verifier to avoid using the compiler's conflicting info.)
+    CreateNativeGcMapWithoutRegisterPromotion();
+    return;
+  }
+
+  ArenaBitVector* references = new (arena_) ArenaBitVector(arena_, mir_graph_->GetNumSSARegs(),
+                                                           false);
+
+  // Calculate max native offset and max reference vreg.
+  MIR* prev_mir = nullptr;
+  int max_ref_vreg = -1;
+  CodeOffset max_native_offset = 0u;
+  for (const auto& entry : safepoints_) {
+    uint32_t native_offset = entry.first->offset;
+    max_native_offset = std::max(max_native_offset, native_offset);
+    MIR* mir = entry.second;
+    UpdateReferenceVRegs(mir, prev_mir, references);
+    max_ref_vreg = std::max(max_ref_vreg, references->GetHighestBitSet());
+    prev_mir = mir;
+  }
+
+#if defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
+  static constexpr bool kLittleEndian = true;
+#else
+  static constexpr bool kLittleEndian = false;
+#endif
+
+  // Build the GC map.
+  uint32_t reg_width = static_cast<uint32_t>((max_ref_vreg + 8) / 8);
+  GcMapBuilder native_gc_map_builder(&native_gc_map_,
+                                     safepoints_.size(),
+                                     max_native_offset, reg_width);
+  if (kLittleEndian) {
+    for (const auto& entry : safepoints_) {
+      uint32_t native_offset = entry.first->offset;
+      MIR* mir = entry.second;
+      UpdateReferenceVRegs(mir, prev_mir, references);
+      // For little-endian, the bytes comprising the bit vector's raw storage are what we need.
+      native_gc_map_builder.AddEntry(native_offset,
+                                     reinterpret_cast<const uint8_t*>(references->GetRawStorage()));
+      prev_mir = mir;
+    }
+  } else {
+    ArenaVector<uint8_t> references_buffer(arena_->Adapter());
+    references_buffer.resize(reg_width);
+    for (const auto& entry : safepoints_) {
+      uint32_t native_offset = entry.first->offset;
+      MIR* mir = entry.second;
+      UpdateReferenceVRegs(mir, prev_mir, references);
+      // Big-endian or unknown endianness, manually translate the bit vector data.
+      const auto* raw_storage = references->GetRawStorage();
+      for (size_t i = 0; i != reg_width; ++i) {
+        references_buffer[i] = static_cast<uint8_t>(
+            raw_storage[i / sizeof(raw_storage[0])] >> (8u * (i % sizeof(raw_storage[0]))));
+      }
+      native_gc_map_builder.AddEntry(native_offset, &references_buffer[0]);
+      prev_mir = mir;
+    }
+  }
+}
+
+void Mir2Lir::CreateNativeGcMapWithoutRegisterPromotion() {
   DCHECK(!encoded_mapping_table_.empty());
   MappingTable mapping_table(&encoded_mapping_table_[0]);
   uint32_t max_native_offset = 0;
@@ -965,6 +1032,7 @@
       block_label_list_(nullptr),
       promotion_map_(nullptr),
       current_dalvik_offset_(0),
+      current_mir_(nullptr),
       estimated_native_code_size_(0),
       reg_pool_(nullptr),
       live_sreg_(0),
@@ -984,6 +1052,7 @@
       slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
       mask_cache_(arena),
+      safepoints_(arena->Adapter()),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
@@ -992,7 +1061,7 @@
   pointer_storage_.reserve(128);
   slow_paths_.reserve(32);
   // Reserve pointer id 0 for nullptr.
-  size_t null_idx = WrapPointer(nullptr);
+  size_t null_idx = WrapPointer<void>(nullptr);
   DCHECK_EQ(null_idx, 0U);
 }
 
@@ -1201,8 +1270,7 @@
     data_target->operands[2] = type;
   }
   // Loads a code pointer. Code from oat file can be mapped anywhere.
-  LIR* load_pc_rel = OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
   DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
 }
@@ -1219,8 +1287,7 @@
     data_target->operands[2] = type;
   }
   // Loads an ArtMethod pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
   DCHECK_NE(cu_->instruction_set, kMips64) << reinterpret_cast<void*>(data_target);
 }
@@ -1234,8 +1301,7 @@
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file));
   }
   // Loads a Class pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
-  AppendLIR(load_pc_rel);
+  OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
 }
 
 std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
@@ -1274,4 +1340,97 @@
   UNREACHABLE();
 }
 
+void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
+  // Mark the references coming from the first predecessor.
+  DCHECK(bb != nullptr);
+  DCHECK(bb->block_type == kEntryBlock || !bb->predecessors.empty());
+  BasicBlock* first_bb =
+      (bb->block_type == kEntryBlock) ? bb : mir_graph_->GetBasicBlock(bb->predecessors[0]);
+  DCHECK(first_bb != nullptr);
+  DCHECK(first_bb->data_flow_info != nullptr);
+  DCHECK(first_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
+  const int32_t* first_vreg_to_ssa_map = first_bb->data_flow_info->vreg_to_ssa_map_exit;
+  references->ClearAllBits();
+  for (uint32_t vreg = 0, num_vregs = mir_graph_->GetNumOfCodeVRs(); vreg != num_vregs; ++vreg) {
+    int32_t sreg = first_vreg_to_ssa_map[vreg];
+    if (sreg != INVALID_SREG && mir_graph_->reg_location_[sreg].ref &&
+        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[sreg])) {
+      references->SetBit(vreg);
+    }
+  }
+  // Unmark the references that are merging with a different value.
+  for (size_t i = 1u, num_pred = bb->predecessors.size(); i < num_pred; ++i) {
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[i]);
+    DCHECK(pred_bb != nullptr);
+    DCHECK(pred_bb->data_flow_info != nullptr);
+    DCHECK(pred_bb->data_flow_info->vreg_to_ssa_map_exit != nullptr);
+    const int32_t* pred_vreg_to_ssa_map = pred_bb->data_flow_info->vreg_to_ssa_map_exit;
+    for (uint32_t vreg : references->Indexes()) {
+      if (first_vreg_to_ssa_map[vreg] != pred_vreg_to_ssa_map[vreg]) {
+        // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
+        // so clearing the bit has no effect on the iterator.
+        references->ClearBit(vreg);
+      }
+    }
+  }
+  if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
+      static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
+    // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
+    // instruction to the previous block. However, the MIRGraph data used above
+    // doesn't reflect that, so we still need to process that MIR insn here.
+    DCHECK_EQ(bb->predecessors.size(), 1u);
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
+    DCHECK(pred_bb != nullptr);
+    DCHECK(pred_bb->last_mir_insn != nullptr);
+    UpdateReferenceVRegsLocal(nullptr, pred_bb->last_mir_insn, references);
+  }
+}
+
+bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
+  DCHECK(mir == nullptr || mir->bb == prev_mir->bb);
+  DCHECK(prev_mir != nullptr);
+  while (prev_mir != nullptr) {
+    if (prev_mir == mir) {
+      return true;
+    }
+    const size_t num_defs = prev_mir->ssa_rep->num_defs;
+    const int32_t* defs = prev_mir->ssa_rep->defs;
+    if (num_defs == 1u && mir_graph_->reg_location_[defs[0]].ref &&
+        !mir_graph_->IsConstantNullRef(mir_graph_->reg_location_[defs[0]])) {
+      references->SetBit(mir_graph_->SRegToVReg(defs[0]));
+    } else {
+      for (size_t i = 0u; i != num_defs; ++i) {
+        references->ClearBit(mir_graph_->SRegToVReg(defs[i]));
+      }
+    }
+    prev_mir = prev_mir->next;
+  }
+  return false;
+}
+
+void Mir2Lir::UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references) {
+  if (mir == nullptr) {
+    // Safepoint in entry sequence.
+    InitReferenceVRegs(mir_graph_->GetEntryBlock(), references);
+    return;
+  }
+  if (IsInstructionReturn(mir->dalvikInsn.opcode) ||
+      mir->dalvikInsn.opcode == Instruction::RETURN_VOID_NO_BARRIER) {
+    references->ClearAllBits();
+    if (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT) {
+      references->SetBit(mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]));
+    }
+    return;
+  }
+  if (prev_mir != nullptr && mir->bb == prev_mir->bb &&
+      UpdateReferenceVRegsLocal(mir, prev_mir, references)) {
+    return;
+  }
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  InitReferenceVRegs(bb, references);
+  bool success = UpdateReferenceVRegsLocal(mir, bb->first_mir_insn, references);
+  DCHECK(success) << "MIR @0x" << std::hex << mir->offset << " not in BB#" << std::dec << mir->bb;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index e57889a..2bcaaca 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -86,7 +86,7 @@
   class DivZeroCheckSlowPath : public Mir2Lir::LIRSlowPath {
    public:
     DivZeroCheckSlowPath(Mir2Lir* m2l, LIR* branch_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in) {
+        : LIRSlowPath(m2l, branch_in) {
     }
 
     void Compile() OVERRIDE {
@@ -105,7 +105,7 @@
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, RegStorage index_in,
                              RegStorage length_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), length_(length_in) {
     }
 
@@ -129,7 +129,7 @@
   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, int index_in, RegStorage length_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), length_(length_in) {
     }
 
@@ -159,7 +159,7 @@
   class NullCheckSlowPath : public Mir2Lir::LIRSlowPath {
    public:
     NullCheckSlowPath(Mir2Lir* m2l, LIR* branch)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch) {
+        : LIRSlowPath(m2l, branch) {
     }
 
     void Compile() OVERRIDE {
@@ -581,7 +581,7 @@
   // At least one will be non-null here, otherwise we wouldn't generate the slow path.
   StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
                       RegStorage r_base)
-      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), unresolved != nullptr ? unresolved : uninit, cont),
+      : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
         second_branch_(unresolved != nullptr ? uninit : nullptr),
         storage_index_(storage_index), r_base_(r_base) {
   }
@@ -1052,9 +1052,9 @@
       class SlowPath : public LIRSlowPath {
        public:
         SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
-                 const RegLocation& rl_method_in, const RegLocation& rl_result_in) :
-                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont_in),
-                   type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) {
+                 const RegLocation& rl_method_in, const RegLocation& rl_result_in)
+            : LIRSlowPath(m2l, fromfast, cont_in),
+              type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) {
         }
 
         void Compile() {
@@ -1120,9 +1120,9 @@
       class SlowPath : public LIRSlowPath {
        public:
         SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in,
-                 int32_t string_idx_in) :
-            LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast_in, cont_in),
-            r_method_(r_method_in), string_idx_(string_idx_in) {
+                 int32_t string_idx_in)
+            : LIRSlowPath(m2l, fromfast_in, cont_in),
+              r_method_(r_method_in), string_idx_(string_idx_in) {
         }
 
         void Compile() {
@@ -1304,7 +1304,7 @@
        public:
         InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in,
                          RegLocation rl_src_in)
-            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont), type_idx_(type_idx_in),
+            : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in),
               rl_src_(rl_src_in) {
         }
 
@@ -1403,7 +1403,12 @@
   }
 }
 
-void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src) {
+void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx,
+                           RegLocation rl_src) {
+  if ((opt_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+    // Compiler analysis proved that this check-cast would never cause an exception.
+    return;
+  }
   bool type_known_final, type_known_abstract, use_declaring_class;
   bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                                               *cu_->dex_file,
@@ -1448,9 +1453,9 @@
       class SlowPath : public LIRSlowPath {
        public:
         SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
-                 const RegStorage class_reg_in) :
-                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont_in),
-                   type_idx_(type_idx_in), class_reg_(class_reg_in) {
+                 const RegStorage class_reg_in)
+            : LIRSlowPath(m2l, fromfast, cont_in),
+              type_idx_(type_idx_in), class_reg_(class_reg_in) {
         }
 
         void Compile() {
@@ -1479,8 +1484,8 @@
   // to call a helper function to do the check.
   class SlowPath : public LIRSlowPath {
    public:
-    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load):
-               LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), load_(load) {
+    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load)
+        : LIRSlowPath(m2l, fromfast, cont), load_(load) {
     }
 
     void Compile() {
@@ -1869,8 +1874,8 @@
     int32_t divisor = mir_graph_->ConstantValue(rl_src2);
     if (CanDivideByReciprocalMultiplyFloat(divisor)) {
       // Generate multiply by reciprocal instead of div.
-      float recip = 1.0f/bit_cast<int32_t, float>(divisor);
-      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<float, int32_t>(recip));
+      float recip = 1.0f/bit_cast<float, int32_t>(divisor);
+      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<int32_t, float>(recip));
       return true;
     }
   } else {
@@ -1878,7 +1883,7 @@
     if (CanDivideByReciprocalMultiplyDouble(divisor)) {
       // Generate multiply by reciprocal instead of div.
       double recip = 1.0/bit_cast<double, int64_t>(divisor);
-      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<double, int64_t>(recip));
+      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<int64_t, double>(recip));
       return true;
     }
   }
@@ -2146,9 +2151,6 @@
   RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
   LoadConstantNoClobber(rl_result.reg, value);
   StoreValue(rl_dest, rl_result);
-  if (value == 0) {
-    Workaround7250540(rl_dest, rl_result.reg);
-  }
 }
 
 void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest,
@@ -2174,7 +2176,7 @@
 class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
   SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
-      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) {
+      : LIRSlowPath(m2l, branch, cont) {
   }
 
   void Compile() OVERRIDE {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6b553fd..2d41ba1 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -48,7 +48,8 @@
   class IntrinsicSlowPathPath : public Mir2Lir::LIRSlowPath {
    public:
     IntrinsicSlowPathPath(Mir2Lir* m2l, CallInfo* info_in, LIR* branch_in, LIR* resume_in)
-        : LIRSlowPath(m2l, info_in->offset, branch_in, resume_in), info_(info_in) {
+        : LIRSlowPath(m2l, branch_in, resume_in), info_(info_in) {
+      DCHECK_EQ(info_in->offset, current_dex_pc_);
     }
 
     void Compile() {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index db844bc..b71691f 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -37,48 +37,6 @@
 }
 
 /*
- * Temporary workaround for Issue 7250540.  If we're loading a constant zero into a
- * promoted floating point register, also copy a zero into the int/ref identity of
- * that sreg.
- */
-void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
-  if (rl_dest.fp) {
-    int pmap_index = SRegToPMap(rl_dest.s_reg_low);
-    const bool is_fp_promoted = promotion_map_[pmap_index].fp_location == kLocPhysReg;
-    const bool is_core_promoted = promotion_map_[pmap_index].core_location == kLocPhysReg;
-    if (is_fp_promoted || is_core_promoted) {
-      // Now, determine if this vreg is ever used as a reference.  If not, we're done.
-      bool used_as_reference = false;
-      int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-      for (int i = 0; !used_as_reference && (i < mir_graph_->GetNumSSARegs()); i++) {
-        if (mir_graph_->SRegToVReg(mir_graph_->reg_location_[i].s_reg_low) == base_vreg) {
-          used_as_reference |= mir_graph_->reg_location_[i].ref;
-        }
-      }
-      if (!used_as_reference) {
-        return;
-      }
-      RegStorage temp_reg = zero_reg;
-      if (!temp_reg.Valid()) {
-        temp_reg = AllocTemp();
-        LoadConstant(temp_reg, 0);
-      }
-      if (is_core_promoted) {
-        // Promoted - just copy in a zero
-        OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
-      } else {
-        // Lives in the frame, need to store.
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32, kNotVolatile);
-      }
-      if (!zero_reg.Valid()) {
-        FreeTemp(temp_reg);
-      }
-    }
-  }
-}
-
-/*
  * Load a Dalvik register into a physical register.  Take care when
  * using this routine, as it doesn't perform any bookkeeping regarding
  * register liveness.  That is the responsibility of the caller.
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index 5c98b10..ed72d67 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -393,6 +393,14 @@
                  kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
                  "mtc1", "!0r,!1s", 4),
+    ENCODING_MAP(kMipsMfhc1, 0x44600000,
+                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mfhc1", "!0r,!1s", 4),
+    ENCODING_MAP(kMipsMthc1, 0x44e00000,
+                 kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
+                 "mthc1", "!0r,!1s", 4),
     ENCODING_MAP(kMipsDelta, 0x27e00000,
                  kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, 15, 0,
                  kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | REG_USE_LR |
@@ -413,6 +421,21 @@
                  kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP,
                  "sync", ";", 4),
+
+    // The following are mips32r6 instructions.
+    ENCODING_MAP(kMipsR6Div, 0x0000009a,
+                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "div", "!0r,!1r,!2r", 4),
+    ENCODING_MAP(kMipsR6Mod, 0x000000da,
+                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mod", "!0r,!1r,!2r", 4),
+    ENCODING_MAP(kMipsR6Mul, 0x00000098,
+                 kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0r,!1r,!2r", 4),
+
     ENCODING_MAP(kMipsUndefined, 0x64000000,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
@@ -539,8 +562,8 @@
          * and is found in lir->target.  If operands[3] is non-NULL,
          * then it is a Switch/Data table.
          */
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
@@ -566,14 +589,14 @@
           res = kRetryAll;
         }
       } else if (lir->opcode == kMipsDeltaLo) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = delta & 0xffff;
       } else if (lir->opcode == kMipsDeltaHi) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = (delta >> 16) & 0xffff;
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index d9471f6..b067221 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -263,7 +263,7 @@
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
       }
       void Compile() OVERRIDE {
         m2l_->ResetRegPool();
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e1b43ca..649b6c9 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -76,7 +76,9 @@
 
     // Required for target - register utilities.
     RegStorage Solo64ToPair64(RegStorage reg);
+    RegStorage Fp64ToSolo32(RegStorage reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE;
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -170,7 +172,7 @@
     LIR* OpIT(ConditionCode cond, const char* guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    void OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
@@ -232,6 +234,12 @@
       return false;
     }
 
+    // True if isa is rev R6.
+    const bool isaIsR6_;
+
+    // True if floating point unit is 32bits.
+    const bool fpuIs32Bit_;
+
   private:
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index d7ed7ac..37bf1a6 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -181,6 +181,30 @@
   }
 }
 
+// Get the reg storage for a wide FP. Is either a solo or a pair. Base is Mips-counted, e.g., even
+// values are valid (0, 2).
+static RegStorage GetWideArgFP(bool fpuIs32Bit, size_t base) {
+  // Think about how to make this be able to be computed. E.g., rMIPS_FARG0 + base. Right now
+  // inlining should optimize everything.
+  if (fpuIs32Bit) {
+    switch (base) {
+      case 0:
+        return RegStorage(RegStorage::k64BitPair, rMIPS_FARG0, rMIPS_FARG1);
+      case 2:
+        return RegStorage(RegStorage::k64BitPair, rMIPS_FARG2, rMIPS_FARG3);
+    }
+  } else {
+    switch (base) {
+      case 0:
+        return RegStorage(RegStorage::k64BitSolo, rMIPS_FARG0);
+      case 2:
+        return RegStorage(RegStorage::k64BitSolo, rMIPS_FARG2);
+    }
+  }
+  LOG(FATAL) << "Unsupported Mips.GetWideFP: " << fpuIs32Bit << " " << base;
+  UNREACHABLE();
+}
+
 void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2) {
   bool wide = true;
@@ -208,8 +232,8 @@
   FlushAllRegs();
   LockCallTemps();
   if (wide) {
-    RegStorage r_tmp1(RegStorage::k64BitPair, rMIPS_FARG0, rMIPS_FARG1);
-    RegStorage r_tmp2(RegStorage::k64BitPair, rMIPS_FARG2, rMIPS_FARG3);
+    RegStorage r_tmp1 = GetWideArgFP(fpuIs32Bit_, 0);
+    RegStorage r_tmp2 = GetWideArgFP(fpuIs32Bit_, 2);
     LoadValueDirectWideFixed(rl_src1, r_tmp1);
     LoadValueDirectWideFixed(rl_src2, r_tmp2);
   } else {
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 17ac629..8093c97 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -194,17 +194,34 @@
     bool src_fp = r_src.IsFloat();
     if (dest_fp) {
       if (src_fp) {
+        // Here if both src and dest are fp registers. OpRegCopy will choose the right copy
+        // (solo or pair).
         OpRegCopy(r_dest, r_src);
       } else {
-        /* note the operands are swapped for the mtc1 instr */
-        NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-        NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+        // note the operands are swapped for the mtc1 and mthc1 instr.
+        // Here if dest is fp reg and src is core reg.
+        if (fpuIs32Bit_) {
+            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+            NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+        } else {
+            r_dest = Fp64ToSolo32(r_dest);
+            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
+            NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
+        }
       }
     } else {
       if (src_fp) {
-        NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
-        NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
+        // Here if dest is core reg and src is fp reg.
+        if (fpuIs32Bit_) {
+            NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
+            NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
+        } else {
+            r_src = Fp64ToSolo32(r_src);
+            NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetReg());
+            NewLIR2(kMipsMfhc1, r_dest.GetHighReg(), r_src.GetReg());
+        }
       } else {
+        // Here if both src and dest are core registers.
         // Handle overlap
         if (r_src.GetHighReg() == r_dest.GetLowReg()) {
           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
@@ -243,12 +260,14 @@
 
 RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
                                    bool is_div) {
-  NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg());
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    NewLIR1(kMipsMflo, rl_result.reg.GetReg());
+
+  if (isaIsR6_) {
+      NewLIR3(is_div ? kMipsR6Div : kMipsR6Mod,
+          rl_result.reg.GetReg(), reg1.GetReg(), reg2.GetReg());
   } else {
-    NewLIR1(kMipsMfhi, rl_result.reg.GetReg());
+      NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg());
+      NewLIR1(is_div ? kMipsMflo : kMipsMfhi, rl_result.reg.GetReg());
   }
   return rl_result;
 }
@@ -257,13 +276,7 @@
                                       bool is_div) {
   RegStorage t_reg = AllocTemp();
   NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
-  NewLIR2(kMipsDiv, reg1.GetReg(), t_reg.GetReg());
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (is_div) {
-    NewLIR1(kMipsMflo, rl_result.reg.GetReg());
-  } else {
-    NewLIR1(kMipsMfhi, rl_result.reg.GetReg());
-  }
+  RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div);
   FreeTemp(t_reg);
   return rl_result;
 }
@@ -335,7 +348,7 @@
   return true;
 }
 
-LIR* MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   UNUSED(reg, target);
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips";
   UNREACHABLE();
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 66e3894..7037055 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -236,22 +236,22 @@
 #endif
   // Double precision registers where the FPU is in 64-bit mode.
   rD0_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
-  rD2_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD3_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
-  rD4_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD5_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
-  rD6_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD7_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
+  rD1_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  rD2_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  rD3_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  rD4_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  rD5_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  rD6_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  rD7_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
 #if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rD8_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD9_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
-  rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
+  rD8_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
+  rD9_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
+  rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
+  rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
+  rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
+  rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
+  rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
+  rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
 #endif
 };
 
@@ -368,10 +368,12 @@
 const RegLocation mips_loc_c_return_float
     {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, rF0), INVALID_SREG, INVALID_SREG};
-// FIXME: move MIPS to k64Bitsolo for doubles
-const RegLocation mips_loc_c_return_double
+const RegLocation mips_loc_c_return_double_fr0
     {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rF0, rF1), INVALID_SREG, INVALID_SREG};
+const RegLocation mips_loc_c_return_double_fr1
+    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, rF0), INVALID_SREG, INVALID_SREG};
 
 enum MipsShiftEncodings {
   kMipsLsl = 0x0,
@@ -476,13 +478,21 @@
   kMipsFldc1,  // ldc1 t,o(b) [110101] b[25..21] t[20..16] o[15..0].
   kMipsFswc1,  // swc1 t,o(b) [111001] b[25..21] t[20..16] o[15..0].
   kMipsFsdc1,  // sdc1 t,o(b) [111101] b[25..21] t[20..16] o[15..0].
-  kMipsMfc1,  // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000].
-  kMipsMtc1,  // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000].
+  kMipsMfc1,   // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000].
+  kMipsMtc1,   // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000].
+  kMipsMfhc1,  // mfhc1 t,s [01000100011] t[20..16] s[15..11] [00000000000].
+  kMipsMthc1,  // mthc1 t,s [01000100111] t[20..16] s[15..11] [00000000000].
   kMipsDelta,  // Psuedo for ori t, s, <label>-<label>.
   kMipsDeltaHi,  // Pseudo for lui t, high16(<label>-<label>).
   kMipsDeltaLo,  // Pseudo for ori t, s, low16(<label>-<label>).
   kMipsCurrPC,  // jal to .+8 to materialize pc.
   kMipsSync,    // sync kind [000000] [0000000000000000] s[10..6] [001111].
+
+  // The following are mips32r6 instructions.
+  kMipsR6Div,   // div d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011010].
+  kMipsR6Mod,   // mod d,s,t [000000] s[25..21] t[20..16] d[15..11] [00011011010].
+  kMipsR6Mul,   // mul d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011000].
+
   kMipsUndefined,  // undefined [011001xxxxxxxxxxxxxxxx].
   kMipsLast
 };
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 8574ffd..830f63a 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -86,16 +86,48 @@
 }
 
 RegLocation MipsMir2Lir::LocCReturnDouble() {
-  return mips_loc_c_return_double;
+  if (fpuIs32Bit_) {
+      return mips_loc_c_return_double_fr0;
+  } else {
+      return mips_loc_c_return_double_fr1;
+  }
 }
 
 // Convert k64BitSolo into k64BitPair
 RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) {
   DCHECK(reg.IsDouble());
+  DCHECK_EQ(reg.GetRegNum() & 1, 0);
   int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
   return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
 }
 
+// Convert 64bit FP (k64BitSolo or k64BitPair) into k32BitSolo.
+// This routine is only used to allow a 64bit FPU to access FP registers 32bits at a time.
+RegStorage MipsMir2Lir::Fp64ToSolo32(RegStorage reg) {
+  DCHECK(!fpuIs32Bit_);
+  DCHECK(reg.IsDouble());
+  DCHECK(!reg.IsPair());
+  int reg_num = reg.GetRegNum() | RegStorage::kFloatingPoint;
+  return RegStorage(RegStorage::k32BitSolo, reg_num);
+}
+
+// Return a target-dependent special register.
+RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg, WideKind wide_kind) {
+  if (wide_kind == kWide) {
+      DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg));
+      RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg),
+                                       TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
+      if (!fpuIs32Bit_ && ret_reg.IsFloat()) {
+        // convert 64BitPair to 64BitSolo for 64bit FPUs.
+        RegStorage low = ret_reg.GetLow();
+        ret_reg = RegStorage::FloatSolo64(low.GetRegNum());
+      }
+      return ret_reg;
+  } else {
+    return TargetReg(reg);
+  }
+}
+
 // Return a target-dependent special register.
 RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
   RegStorage res_reg;
@@ -145,12 +177,7 @@
  */
 ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
   if (reg.IsDouble()) {
-    if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-        ->Is32BitFloatingPoint()) {
-      return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
-    } else {
-      return ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0);
-    }
+    return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
   } else if (reg.IsSingle()) {
     return ResourceMask::Bit(reg.GetRegNum() + kMipsFPReg0);
   } else {
@@ -401,8 +428,7 @@
   Clobber(rs_rF13);
   Clobber(rs_rF14);
   Clobber(rs_rF15);
-  if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-      ->Is32BitFloatingPoint()) {
+  if (fpuIs32Bit_) {
     Clobber(rs_rD0_fr0);
     Clobber(rs_rD1_fr0);
     Clobber(rs_rD2_fr0);
@@ -462,28 +488,20 @@
 }
 
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
-  const bool fpu_is_32bit =
-      cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
-      ->Is32BitFloatingPoint();
   reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
                                             sp_regs,
-                                            fpu_is_32bit ? dp_fr0_regs : dp_fr1_regs,
+                                            fpuIs32Bit_ ? dp_fr0_regs : dp_fr1_regs,
                                             reserved_regs, empty_pool /* reserved64 */,
                                             core_temps, empty_pool /* core64_temps */,
                                             sp_temps,
-                                            fpu_is_32bit ? dp_fr0_temps : dp_fr1_temps));
+                                            fpuIs32Bit_ ? dp_fr0_temps : dp_fr1_temps));
 
   // Target-specific adjustments.
 
   // Alias single precision floats to appropriate half of overlapping double.
   for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int sp_reg_num = info->GetReg().GetRegNum();
-    int dp_reg_num;
-    if (fpu_is_32bit) {
-      dp_reg_num = sp_reg_num & ~1;
-    } else {
-      dp_reg_num = sp_reg_num >> 1;
-    }
+    int dp_reg_num = sp_reg_num & ~1;
     RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Double precision register's master storage should refer to itself.
@@ -502,11 +520,7 @@
   // TODO: adjust when we roll to hard float calling convention.
   reg_pool_->next_core_reg_ = 2;
   reg_pool_->next_sp_reg_ = 2;
-  if (fpu_is_32bit) {
-    reg_pool_->next_dp_reg_ = 2;
-  } else {
-    reg_pool_->next_dp_reg_ = 1;
-  }
+  reg_pool_->next_dp_reg_ = 2;
 }
 
 /*
@@ -610,7 +624,11 @@
 }
 
 MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) {
+    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this),
+      isaIsR6_(cu->compiler_driver->GetInstructionSetFeatures()
+                 ->AsMipsInstructionSetFeatures()->IsR6()),
+      fpuIs32Bit_(cu->compiler_driver->GetInstructionSetFeatures()
+                    ->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
   for (int i = 0; i < kMipsLast; i++) {
     DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i)
         << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 2d26922..3b7e0ed 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -182,7 +182,11 @@
       opcode = kMipsAnd;
       break;
     case kOpMul:
-      opcode = kMipsMul;
+      if (isaIsR6_) {
+          opcode = kMipsR6Mul;
+      } else {
+          opcode = kMipsMul;
+      }
       break;
     case kOpOr:
       opcode = kMipsOr;
@@ -271,7 +275,11 @@
       break;
     case kOpMul:
       short_form = false;
-      opcode = kMipsMul;
+      if (isaIsR6_) {
+          opcode = kMipsR6Mul;
+      } else {
+          opcode = kMipsMul;
+      }
       break;
     default:
       LOG(FATAL) << "Bad case in OpRegRegImm";
@@ -359,12 +367,23 @@
 
 LIR* MipsMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
   LIR *res;
-  if (!r_dest.IsPair()) {
-    // Form 64-bit pair
-    r_dest = Solo64ToPair64(r_dest);
+  if (fpuIs32Bit_ || !r_dest.IsFloat()) {
+    // 32bit FPU (pairs) or loading into GPR.
+    if (!r_dest.IsPair()) {
+      // Form 64-bit pair
+      r_dest = Solo64ToPair64(r_dest);
+    }
+    res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value));
+    LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value));
+  } else {
+    // Here if we have a 64bit FPU and loading into FPR.
+    RegStorage r_temp = AllocTemp();
+    r_dest = Fp64ToSolo32(r_dest);
+    res = LoadConstantNoClobber(r_dest, Low32Bits(value));
+    LoadConstantNoClobber(r_temp, High32Bits(value));
+    NewLIR2(kMipsMthc1, r_temp.GetReg(), r_dest.GetReg());
+    FreeTemp(r_temp);
   }
-  res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value));
-  LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value));
   return res;
 }
 
@@ -483,32 +502,29 @@
   LIR *load2 = NULL;
   MipsOpCode opcode = kMipsNop;
   bool short_form = IS_SIMM16(displacement);
-  bool pair = r_dest.IsPair();
+  bool is64bit = false;
 
   switch (size) {
     case k64:
     case kDouble:
-      if (!pair) {
+      is64bit = true;
+      if (fpuIs32Bit_ && !r_dest.IsPair()) {
         // Form 64-bit pair
         r_dest = Solo64ToPair64(r_dest);
-        pair = 1;
-      }
-      if (r_dest.IsFloat()) {
-        DCHECK_EQ(r_dest.GetLowReg(), r_dest.GetHighReg() - 1);
-        opcode = kMipsFlwc1;
-      } else {
-        opcode = kMipsLw;
       }
       short_form = IS_SIMM16_2WORD(displacement);
-      DCHECK_EQ((displacement & 0x3), 0);
-      break;
+      FALLTHROUGH_INTENDED;
     case k32:
     case kSingle:
     case kReference:
       opcode = kMipsLw;
       if (r_dest.IsFloat()) {
         opcode = kMipsFlwc1;
-        DCHECK(r_dest.IsSingle());
+        if (!is64bit) {
+          DCHECK(r_dest.IsSingle());
+        } else {
+          DCHECK(r_dest.IsDouble());
+        }
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -531,35 +547,56 @@
   }
 
   if (short_form) {
-    if (!pair) {
+    if (!is64bit) {
       load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
     } else {
-      load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
-      load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      if (fpuIs32Bit_ || !r_dest.IsFloat()) {
+        DCHECK(r_dest.IsPair());
+        load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      } else {
+        // Here if 64bit fpu and r_dest is a 64bit fp register.
+        RegStorage r_tmp = AllocTemp();
+        // FIXME: why is r_dest a 64BitPair here???
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+        FreeTemp(r_tmp);
+      }
     }
   } else {
-    if (pair) {
-      RegStorage r_tmp = AllocTemp();
-      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
-      load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg());
-      load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg());
-      FreeTemp(r_tmp);
-    } else {
-      RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
+    if (!is64bit) {
+      RegStorage r_tmp = (r_base == r_dest || r_dest.IsFloat()) ? AllocTemp() : r_dest;
       res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
       load = NewLIR3(opcode, r_dest.GetReg(), 0, r_tmp.GetReg());
       if (r_tmp != r_dest)
         FreeTemp(r_tmp);
+    } else {
+      RegStorage r_tmp = AllocTemp();
+      res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
+      if (fpuIs32Bit_ || !r_dest.IsFloat()) {
+        DCHECK(r_dest.IsPair());
+        load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg());
+        load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg());
+      } else {
+        // Here if 64bit fpu and r_dest is a 64bit fp register
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), LOWORD_OFFSET, r_tmp.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), HIWORD_OFFSET, r_tmp.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+      }
+      FreeTemp(r_tmp);
     }
   }
 
   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
     DCHECK_EQ(r_base, rs_rMIPS_SP);
-    AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                            true /* is_load */, pair /* is64bit */);
-    if (pair) {
+    AnnotateDalvikRegAccess(load, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2,
+                            true /* is_load */, is64bit /* is64bit */);
+    if (is64bit) {
       AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
-                              true /* is_load */, pair /* is64bit */);
+                              true /* is_load */, is64bit /* is64bit */);
     }
   }
   return load;
@@ -594,32 +631,29 @@
   LIR *store2 = NULL;
   MipsOpCode opcode = kMipsNop;
   bool short_form = IS_SIMM16(displacement);
-  bool pair = r_src.IsPair();
+  bool is64bit = false;
 
   switch (size) {
     case k64:
     case kDouble:
-      if (!pair) {
+      is64bit = true;
+      if (fpuIs32Bit_ && !r_src.IsPair()) {
         // Form 64-bit pair
         r_src = Solo64ToPair64(r_src);
-        pair = 1;
-      }
-      if (r_src.IsFloat()) {
-        DCHECK_EQ(r_src.GetLowReg(), r_src.GetHighReg() - 1);
-        opcode = kMipsFswc1;
-      } else {
-        opcode = kMipsSw;
       }
       short_form = IS_SIMM16_2WORD(displacement);
-      DCHECK_EQ((displacement & 0x3), 0);
-      break;
+      FALLTHROUGH_INTENDED;
     case k32:
     case kSingle:
     case kReference:
       opcode = kMipsSw;
       if (r_src.IsFloat()) {
         opcode = kMipsFswc1;
-        DCHECK(r_src.IsSingle());
+        if (!is64bit) {
+          DCHECK(r_src.IsSingle());
+        } else {
+          DCHECK(r_src.IsDouble());
+        }
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -637,31 +671,53 @@
   }
 
   if (short_form) {
-    if (!pair) {
+    if (!is64bit) {
       store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
     } else {
-      store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
-      store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      if (fpuIs32Bit_ || !r_src.IsFloat()) {
+        DCHECK(r_src.IsPair());
+        store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      } else {
+        // Here if 64bit fpu and r_src is a 64bit fp register
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      }
     }
   } else {
     RegStorage r_scratch = AllocTemp();
     res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
-    if (!pair) {
+    if (!is64bit) {
       store =  NewLIR3(opcode, r_src.GetReg(), 0, r_scratch.GetReg());
     } else {
-      store =  NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg());
-      store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg());
+      if (fpuIs32Bit_ || !r_src.IsFloat()) {
+        DCHECK(r_src.IsPair());
+        store = NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg());
+        store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg());
+      } else {
+        // Here if 64bit fpu and r_src is a 64bit fp register
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = NewLIR3(kMipsFswc1, r_src.GetReg(), LOWORD_OFFSET, r_scratch.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), HIWORD_OFFSET, r_scratch.GetReg());
+        FreeTemp(r_tmp);
+      }
     }
     FreeTemp(r_scratch);
   }
 
   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
     DCHECK_EQ(r_base, rs_rMIPS_SP);
-    AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
-                            false /* is_load */, pair /* is64bit */);
-    if (pair) {
+    AnnotateDalvikRegAccess(store, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2,
+                            false /* is_load */, is64bit /* is64bit */);
+    if (is64bit) {
       AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
-                              false /* is_load */, pair /* is64bit */);
+                              false /* is_load */, is64bit /* is64bit */);
     }
   }
 
diff --git a/compiler/dex/quick/mips64/assemble_mips64.cc b/compiler/dex/quick/mips64/assemble_mips64.cc
index 17a0ef1..d96561b 100644
--- a/compiler/dex/quick/mips64/assemble_mips64.cc
+++ b/compiler/dex/quick/mips64/assemble_mips64.cc
@@ -629,8 +629,8 @@
          * and is found in lir->target.  If operands[3] is non-NULL,
          * then it is a Switch/Data table.
          */
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) {
@@ -651,14 +651,14 @@
           res = kRetryAll;
         }
       } else if (lir->opcode == kMips64DeltaLo) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = delta & 0xffff;
       } else if (lir->opcode == kMips64DeltaHi) {
-        int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset;
-        EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+        int offset1 = UnwrapPointer<LIR>(lir->operands[2])->offset;
+        const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(lir->operands[3]);
         int offset2 = tab_rec ? tab_rec->offset : lir->target->offset;
         int delta = offset2 - offset1;
         lir->operands[1] = (delta >> 16) & 0xffff;
diff --git a/compiler/dex/quick/mips64/call_mips64.cc b/compiler/dex/quick/mips64/call_mips64.cc
index 31be1c2..0e58770 100644
--- a/compiler/dex/quick/mips64/call_mips64.cc
+++ b/compiler/dex/quick/mips64/call_mips64.cc
@@ -268,7 +268,7 @@
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
       }
       void Compile() OVERRIDE {
         m2l_->ResetRegPool();
diff --git a/compiler/dex/quick/mips64/codegen_mips64.h b/compiler/dex/quick/mips64/codegen_mips64.h
index 57c30d8..c9fd62f 100644
--- a/compiler/dex/quick/mips64/codegen_mips64.h
+++ b/compiler/dex/quick/mips64/codegen_mips64.h
@@ -182,7 +182,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide);
   void OpEndIT(LIR* it);
   LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+  void OpPcRelLoad(RegStorage reg, LIR* target);
   LIR* OpReg(OpKind op, RegStorage r_dest_src);
   void OpRegCopy(RegStorage r_dest, RegStorage r_src);
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
diff --git a/compiler/dex/quick/mips64/int_mips64.cc b/compiler/dex/quick/mips64/int_mips64.cc
index 8a57c82..5c545bb 100644
--- a/compiler/dex/quick/mips64/int_mips64.cc
+++ b/compiler/dex/quick/mips64/int_mips64.cc
@@ -283,7 +283,7 @@
   return true;
 }
 
-LIR* Mips64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void Mips64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   UNUSED(reg, target);
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips64";
   UNREACHABLE();
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 8348626..0b480a0 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -27,7 +27,7 @@
 class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
   SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
-      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont),
+      : LIRSlowPath(m2l, branch, cont),
         num_used_args_(0u) {
   }
 
@@ -406,6 +406,7 @@
 bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special) {
   DCHECK(special.flags & kInlineSpecial);
   current_dalvik_offset_ = mir->offset;
+  DCHECK(current_mir_ == nullptr);  // Safepoints attributed to prologue.
   MIR* return_mir = nullptr;
   bool successful = false;
   EnsureInitializedArgMappingToPhysicalReg();
@@ -540,7 +541,7 @@
       GenMoveException(rl_dest);
       break;
 
-    case Instruction::RETURN_VOID_BARRIER:
+    case Instruction::RETURN_VOID_NO_BARRIER:
     case Instruction::RETURN_VOID:
       if (((cu_->access_flags & kAccConstructor) != 0) &&
           cu_->compiler_driver->RequiresConstructorBarrier(Thread::Current(), cu_->dex_file,
@@ -587,9 +588,6 @@
     case Instruction::MOVE_FROM16:
     case Instruction::MOVE_OBJECT_FROM16:
       StoreValue(rl_dest, rl_src[0]);
-      if (rl_src[0].is_const && (mir_graph_->ConstantValue(rl_src[0]) == 0)) {
-        Workaround7250540(rl_dest, RegStorage::InvalidReg());
-      }
       break;
 
     case Instruction::MOVE_WIDE:
@@ -632,7 +630,7 @@
       break;
 
     case Instruction::CHECK_CAST: {
-      GenCheckCast(mir->offset, vB, rl_src[0]);
+      GenCheckCast(opt_flags, mir->offset, vB, rl_src[0]);
       break;
     }
     case Instruction::INSTANCE_OF:
@@ -1276,6 +1274,7 @@
     }
 
     current_dalvik_offset_ = mir->offset;
+    current_mir_ = mir;
     int opcode = mir->dalvikInsn.opcode;
 
     GenPrintLabel(mir);
@@ -1376,6 +1375,7 @@
 
 LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel(int opcode) {
   m2l_->SetCurrentDexPc(current_dex_pc_);
+  m2l_->current_mir_ = current_mir_;
   LIR* target = m2l_->NewLIR0(opcode);
   fromfast_->target = target;
   return target;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 6f3f057..cca4e5a 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -131,6 +131,7 @@
 #define MAX_ASSEMBLER_RETRIES 50
 
 class BasicBlock;
+class BitVector;
 struct CallInfo;
 struct CompilationUnit;
 struct InlineMethod;
@@ -490,9 +491,10 @@
 
     class LIRSlowPath : public ArenaObject<kArenaAllocSlowPaths> {
      public:
-      LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
-                  LIR* cont = nullptr) :
-        m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+      LIRSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont = nullptr)
+          : m2l_(m2l), cu_(m2l->cu_),
+            current_dex_pc_(m2l->current_dalvik_offset_), current_mir_(m2l->current_mir_),
+            fromfast_(fromfast), cont_(cont) {
       }
       virtual ~LIRSlowPath() {}
       virtual void Compile() = 0;
@@ -511,6 +513,7 @@
       Mir2Lir* const m2l_;
       CompilationUnit* const cu_;
       const DexOffset current_dex_pc_;
+      MIR* current_mir_;
       LIR* const fromfast_;
       LIR* const cont_;
     };
@@ -582,14 +585,16 @@
      * TUNING: If use of these utilities becomes more common on 32-bit builds, it
      * may be worth conditionally-compiling a set of identity functions here.
      */
-    uint32_t WrapPointer(void* pointer) {
+    template <typename T>
+    uint32_t WrapPointer(const T* pointer) {
       uint32_t res = pointer_storage_.size();
       pointer_storage_.push_back(pointer);
       return res;
     }
 
-    void* UnwrapPointer(size_t index) {
-      return pointer_storage_[index];
+    template <typename T>
+    const T* UnwrapPointer(size_t index) {
+      return reinterpret_cast<const T*>(pointer_storage_[index]);
     }
 
     // strdup(), but allocates from the arena.
@@ -670,6 +675,7 @@
     bool VerifyCatchEntries();
     void CreateMappingTables();
     void CreateNativeGcMap();
+    void CreateNativeGcMapWithoutRegisterPromotion();
     int AssignLiteralOffset(CodeOffset offset);
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
@@ -826,7 +832,7 @@
     void GenNewInstance(uint32_t type_idx, RegLocation rl_dest);
     void GenThrow(RegLocation rl_src);
     void GenInstanceof(uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
-    void GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src);
+    void GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src);
     void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest,
                       RegLocation rl_src1, RegLocation rl_src2);
     virtual void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -1379,7 +1385,7 @@
     virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0;
     virtual void OpEndIT(LIR* it) = 0;
     virtual LIR* OpMem(OpKind op, RegStorage r_base, int disp) = 0;
-    virtual LIR* OpPcRelLoad(RegStorage reg, LIR* target) = 0;
+    virtual void OpPcRelLoad(RegStorage reg, LIR* target) = 0;
     virtual LIR* OpReg(OpKind op, RegStorage r_dest_src) = 0;
     virtual void OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
     virtual LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) = 0;
@@ -1459,9 +1465,6 @@
     virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
 
-    // Temp workaround
-    void Workaround7250540(RegLocation rl_dest, RegStorage zero_reg);
-
     virtual LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) = 0;
 
     // Queries for backend support for vectors
@@ -1729,6 +1732,16 @@
     // See CheckRegLocationImpl.
     void CheckRegLocation(RegLocation rl) const;
 
+    // Find the references at the beginning of a basic block (for generating GC maps).
+    void InitReferenceVRegs(BasicBlock* bb, BitVector* references);
+
+    // Update references from prev_mir to mir in the same BB. If mir is null or before
+    // prev_mir, report failure (return false) and update references to the end of the BB.
+    bool UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references);
+
+    // Update references from prev_mir to mir.
+    void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1745,8 +1758,7 @@
     ArenaVector<FillArrayData*> fill_array_data_;
     ArenaVector<RegisterInfo*> tempreg_info_;
     ArenaVector<RegisterInfo*> reginfo_map_;
-    ArenaVector<void*> pointer_storage_;
-    CodeOffset current_code_offset_;    // Working byte offset of machine instructons.
+    ArenaVector<const void*> pointer_storage_;
     CodeOffset data_offset_;            // starting offset of literal pool.
     size_t total_size_;                   // header + code size.
     LIR* block_label_list_;
@@ -1761,6 +1773,7 @@
      * The low-level LIR creation utilites will pull it from here.  Rework this.
      */
     DexOffset current_dalvik_offset_;
+    MIR* current_mir_;
     size_t estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
     std::unique_ptr<RegisterPool> reg_pool_;
     /*
@@ -1799,6 +1812,9 @@
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
 
+    // Record the MIR that generated a given safepoint (nullptr for prologue safepoints).
+    ArenaVector<std::pair<LIR*, MIR*>> safepoints_;
+
   protected:
     // ABI support
     class ShortyArg {
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 922f2f7..1673312 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -250,7 +250,7 @@
     Instruction::INVOKE_DIRECT,
     Instruction::INVOKE_STATIC,
     Instruction::INVOKE_INTERFACE,
-    Instruction::RETURN_VOID_BARRIER,
+    Instruction::RETURN_VOID_NO_BARRIER,
     Instruction::INVOKE_VIRTUAL_RANGE,
     Instruction::INVOKE_SUPER_RANGE,
     Instruction::INVOKE_DIRECT_RANGE,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 6f26b78..118ab1d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1586,13 +1586,11 @@
                            int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
-    Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(table_or_disp));
+    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp);
     disp = tab_rec->offset;
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
-    Mir2Lir::EmbeddedData *tab_rec =
-        reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(raw_base_or_table));
+    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
     disp = tab_rec->offset;
   }
   if (entry->opcode == kX86PcRelLoadRA) {
@@ -1794,8 +1792,7 @@
             DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
             DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg));
             // Grab the target offset from the saved data.
-            Mir2Lir::EmbeddedData* tab_rec =
-                reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(lir->operands[4]));
+            const EmbeddedData* tab_rec = UnwrapPointer<Mir2Lir::EmbeddedData>(lir->operands[4]);
             CodeOffset target = tab_rec->offset;
             // Handle 64 bit RIP addressing.
             // Offset is relative to next instruction.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 11c1465..abee872 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -193,7 +193,7 @@
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
-          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+          : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) {
       }
       void Compile() OVERRIDE {
         m2l_->ResetRegPool();
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 20163b4..040a8c4 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -296,7 +296,7 @@
   LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
   void OpEndIT(LIR* it) OVERRIDE;
   LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  void OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
   LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
   void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
   LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 91168c7..4eb626c 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1324,7 +1324,7 @@
   return true;
 }
 
-LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   if (cu_->target64) {
     // We can do this directly using RIP addressing.
     // We don't know the proper offset for the value, so pick one that will force
@@ -1334,7 +1334,7 @@
     LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
     res->target = target;
     res->flags.fixup = kFixupLoad;
-    return res;
+    return;
   }
 
   CHECK(base_of_code_ != nullptr);
@@ -1353,11 +1353,9 @@
   // 4 byte offset.  We will fix this up in the assembler later to have the right
   // value.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
-                    0, 0, target);
+  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256);
   res->target = target;
   res->flags.fixup = kFixupLoad;
-  return res;
 }
 
 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
@@ -1412,7 +1410,7 @@
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
                              RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
     }
 
@@ -1460,7 +1458,7 @@
    public:
     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
                              int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
-        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in),
+        : LIRSlowPath(m2l, branch_in),
           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
     }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index dbe4848..f128eb7 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -390,7 +390,7 @@
              break;
           }
           case 'p': {
-            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
+            const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(operand);
             buf += StringPrintf("0x%08x", tab_rec->offset);
             break;
           }
@@ -1062,8 +1062,7 @@
   for (LIR* p : method_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
       uint32_t target_method_idx = p->operands[2];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
@@ -1075,8 +1074,7 @@
   for (LIR* p : class_type_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
 
-      const DexFile* class_dex_file =
-        reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
+      const DexFile* class_dex_file = UnwrapPointer<DexFile>(p->operands[3]);
       uint32_t target_type_idx = p->operands[2];
 
       // The offset to patch is the last 4 bytes of the instruction.
@@ -1090,8 +1088,7 @@
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kX86CallI);
       uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file =
-          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
+      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 3d3d5cb..100d49a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -491,11 +491,12 @@
   }
 }
 
-static DexToDexCompilationLevel GetDexToDexCompilationlevel(
+DexToDexCompilationLevel CompilerDriver::GetDexToDexCompilationlevel(
     Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file,
-    const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile::ClassDef& class_def) {
   auto* const runtime = Runtime::Current();
-  if (runtime->UseJit()) {
+  if (runtime->UseJit() || GetCompilerOptions().VerifyAtRuntime()) {
+    // Verify at runtime shouldn't dex to dex since we didn't resolve of verify.
     return kDontDexToDexCompile;
   }
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
@@ -605,12 +606,22 @@
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
-  Resolve(class_loader, dex_files, thread_pool, timings);
-  VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
+  const bool verification_enabled = compiler_options_->IsVerificationEnabled();
+  const bool never_verify = compiler_options_->NeverVerify();
 
-  if (!compiler_options_->IsVerificationEnabled()) {
+  // We need to resolve for never_verify since it needs to run dex to dex to add the
+  // RETURN_VOID_NO_BARRIER.
+  if (never_verify || verification_enabled) {
+    Resolve(class_loader, dex_files, thread_pool, timings);
+    VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
+  }
+
+  if (never_verify) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
     SetVerified(class_loader, dex_files, thread_pool, timings);
+  }
+
+  if (!verification_enabled) {
     return;
   }
 
@@ -1387,8 +1398,11 @@
     }
   } else {
     bool method_in_image = heap->FindSpaceFromObject(method, false)->IsImageSpace();
-    if (method_in_image || compiling_boot) {
+    if (method_in_image || compiling_boot || runtime->UseJit()) {
       // We know we must be able to get to the method in the image, so use that pointer.
+      // In the case where we are the JIT, we can always use direct pointers since we know where
+      // the method and its code are / will be. We don't sharpen to interpreter bridge since we
+      // check IsQuickToInterpreterBridge above.
       CHECK(!method->IsAbstract());
       *type = sharp_type;
       *direct_method = force_relocations ? -1 : reinterpret_cast<uintptr_t>(method);
@@ -1901,7 +1915,7 @@
     if (klass->IsResolved()) {
       if (klass->GetStatus() < mirror::Class::kStatusVerified) {
         ObjectLock<mirror::Class> lock(soa.Self(), klass);
-        klass->SetStatus(mirror::Class::kStatusVerified, soa.Self());
+        mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, soa.Self());
       }
       // Record the final class status if necessary.
       ClassReference ref(manager->GetDexFile(), class_def_index);
@@ -2090,6 +2104,8 @@
     return;
   }
 
+  CompilerDriver* const driver = manager->GetCompiler();
+
   // Can we run DEX-to-DEX compiler on this class ?
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
@@ -2097,8 +2113,8 @@
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
-    dex_to_dex_compilation_level = GetDexToDexCompilationlevel(soa.Self(), class_loader, dex_file,
-                                                               class_def);
+    dex_to_dex_compilation_level = driver->GetDexToDexCompilationlevel(
+        soa.Self(), class_loader, dex_file, class_def);
   }
   ClassDataItemIterator it(dex_file, class_data);
   // Skip fields
@@ -2108,7 +2124,6 @@
   while (it.HasNextInstanceField()) {
     it.Next();
   }
-  CompilerDriver* driver = manager->GetCompiler();
 
   bool compilation_enabled = driver->IsClassToCompile(
       dex_file.StringByTypeIdx(class_def.class_idx_));
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 28a8245..9463c2c 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -468,6 +468,10 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  DexToDexCompilationLevel GetDexToDexCompilationlevel(
+      Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file,
+      const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                   ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5042c75..d06ec27 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -32,7 +32,8 @@
  public:
   enum CompilerFilter {
     kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
-    kInterpretOnly,       // Compile nothing except JNI stubs.
+    kInterpretOnly,       // Verify, and compile only JNI stubs.
+    kVerifyAtRuntime,     // Only compile JNI stubs and verify at runtime.
     kSpace,               // Maximize space savings.
     kBalanced,            // Try to get the best performance return on compilation investment.
     kSpeed,               // Maximize runtime performance.
@@ -81,13 +82,23 @@
     compiler_filter_ = compiler_filter;
   }
 
+  bool VerifyAtRuntime() const {
+    return compiler_filter_ == CompilerOptions::kVerifyAtRuntime;
+  }
+
   bool IsCompilationEnabled() const {
-    return ((compiler_filter_ != CompilerOptions::kVerifyNone) &&
-            (compiler_filter_ != CompilerOptions::kInterpretOnly));
+    return compiler_filter_ != CompilerOptions::kVerifyNone &&
+        compiler_filter_ != CompilerOptions::kInterpretOnly &&
+        compiler_filter_ != CompilerOptions::kVerifyAtRuntime;
   }
 
   bool IsVerificationEnabled() const {
-    return (compiler_filter_ != CompilerOptions::kVerifyNone);
+    return compiler_filter_ != CompilerOptions::kVerifyNone &&
+        compiler_filter_ != CompilerOptions::kVerifyAtRuntime;
+  }
+
+  bool NeverVerify() const {
+    return compiler_filter_ == CompilerOptions::kVerifyNone;
   }
 
   size_t GetHugeMethodThreshold() const {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d238b2c..c1555aa 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -133,7 +133,7 @@
     return false;
   }
   std::string error_msg;
-  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, &error_msg);
+  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, nullptr, &error_msg);
   if (oat_file_ == nullptr) {
     PLOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location
         << ": " << error_msg;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index beb5755..8b31154 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -217,20 +217,21 @@
   auto* const mapping_table = compiled_method->GetMappingTable();
   auto* const vmap_table = compiled_method->GetVmapTable();
   auto* const gc_map = compiled_method->GetGcMap();
+  CHECK(gc_map != nullptr) << PrettyMethod(method);
   // Write out pre-header stuff.
   uint8_t* const mapping_table_ptr = code_cache->AddDataArray(
       self, mapping_table->data(), mapping_table->data() + mapping_table->size());
-  if (mapping_table == nullptr) {
+  if (mapping_table_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   uint8_t* const vmap_table_ptr = code_cache->AddDataArray(
       self, vmap_table->data(), vmap_table->data() + vmap_table->size());
-  if (vmap_table == nullptr) {
+  if (vmap_table_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   uint8_t* const gc_map_ptr = code_cache->AddDataArray(
       self, gc_map->data(), gc_map->data() + gc_map->size());
-  if (gc_map == nullptr) {
+  if (gc_map_ptr == nullptr) {
     return false;  // Out of data cache.
   }
   // Don't touch this until you protect / unprotect the code.
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f513ea8..70bfb81 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1510,25 +1510,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1547,16 +1547,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
                              f3, f4, f5, f6, f7, f8, f9, f10);
@@ -1580,25 +1580,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1617,16 +1617,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
                              i4, i5, i6, i7, i8, i9, i10);
@@ -1649,25 +1649,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1686,16 +1686,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
                              f7, i8, f8, i9, f9, i10, f10);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 46aed60..c426625 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -122,7 +122,7 @@
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
   }
   std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), nullptr,
-                                                  nullptr, false, &error_msg));
+                                                  nullptr, false, nullptr, &error_msg));
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
   ASSERT_TRUE(oat_header.IsValid());
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
new file mode 100644
index 0000000..0ecc0d7
--- /dev/null
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "boolean_simplifier.h"
+
+namespace art {
+
+// Returns true if 'block1' and 'block2' are empty, merge into the same single
+// successor and the successor can only be reached from them.
+static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
+  if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
+  HBasicBlock* succ1 = block1->GetSuccessors().Get(0);
+  HBasicBlock* succ2 = block2->GetSuccessors().Get(0);
+  return succ1 == succ2 && succ1->GetPredecessors().Size() == 2u;
+}
+
+// Returns true if the outcome of the branching matches the boolean value of
+// the branching condition.
+static bool PreservesCondition(HInstruction* input_true, HInstruction* input_false) {
+  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsOne()
+      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsZero();
+}
+
+// Returns true if the outcome of the branching is exactly opposite of the
+// boolean value of the branching condition.
+static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false) {
+  return input_true->IsIntConstant() && input_true->AsIntConstant()->IsZero()
+      && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
+}
+
+// Returns an instruction with the opposite boolean value from 'cond'.
+static HInstruction* GetOppositeCondition(HInstruction* cond) {
+  HGraph* graph = cond->GetBlock()->GetGraph();
+  ArenaAllocator* allocator = graph->GetArena();
+
+  if (cond->IsCondition()) {
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    if (cond->IsEqual()) {
+      return new (allocator) HNotEqual(lhs, rhs);
+    } else if (cond->IsNotEqual()) {
+      return new (allocator) HEqual(lhs, rhs);
+    } else if (cond->IsLessThan()) {
+      return new (allocator) HGreaterThanOrEqual(lhs, rhs);
+    } else if (cond->IsLessThanOrEqual()) {
+      return new (allocator) HGreaterThan(lhs, rhs);
+    } else if (cond->IsGreaterThan()) {
+      return new (allocator) HLessThanOrEqual(lhs, rhs);
+    } else if (cond->IsGreaterThanOrEqual()) {
+      return new (allocator) HLessThan(lhs, rhs);
+    }
+  } else if (cond->IsIntConstant()) {
+    HIntConstant* int_const = cond->AsIntConstant();
+    if (int_const->IsZero()) {
+      return graph->GetIntConstant1();
+    } else {
+      DCHECK(int_const->IsOne());
+      return graph->GetIntConstant0();
+    }
+  }
+
+  LOG(FATAL) << "Instruction " << cond->DebugName() << " used as a condition";
+  UNREACHABLE();
+}
+
+void HBooleanSimplifier::Run() {
+  // Iterate in post order in the unlikely case that removing one occurrence of
+  // the pattern empties a branch block of another occurrence. Otherwise the
+  // order does not matter.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (!block->EndsWithIf()) continue;
+
+    // Find elements of the pattern.
+    HIf* if_instruction = block->GetLastInstruction()->AsIf();
+    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+    if (!BlocksDoMergeTogether(true_block, false_block)) {
+      continue;
+    }
+    HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+    if (!merge_block->HasSinglePhi()) {
+      continue;
+    }
+    HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+    HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+    HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+    // Check if the selection negates/preserves the value of the condition and
+    // if so, generate a suitable replacement instruction.
+    HInstruction* if_condition = if_instruction->InputAt(0);
+    HInstruction* replacement;
+    if (NegatesCondition(true_value, false_value)) {
+      replacement = GetOppositeCondition(if_condition);
+      if (replacement->GetBlock() == nullptr) {
+        block->InsertInstructionBefore(replacement, if_instruction);
+      }
+    } else if (PreservesCondition(true_value, false_value)) {
+      replacement = if_condition;
+    } else {
+      continue;
+    }
+
+    // Replace the selection outcome with the new instruction.
+    phi->ReplaceWith(replacement);
+    merge_block->RemovePhi(phi);
+
+    // Link the start/end blocks and remove empty branches.
+    graph_->MergeEmptyBranches(block, merge_block);
+
+    // Remove the original condition if it is now unused.
+    if (!if_condition->HasUses()) {
+      if_condition->GetBlock()->RemoveInstruction(if_condition);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
new file mode 100644
index 0000000..a88733e
--- /dev/null
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This optimization recognizes a common pattern where a boolean value is
+// either cast to an integer or negated by selecting from zero/one integer
+// constants with an If statement. Because boolean values are internally
+// represented as zero/one, we can safely replace the pattern with a suitable
+// condition instruction.
+
+// Example: Negating a boolean value
+//     B1:
+//       z1   ParameterValue
+//       i2   IntConstant 0
+//       i3   IntConstant 1
+//       v4   Goto B2
+//     B2:
+//       z5   NotEquals [ z1 i2 ]
+//       v6   If [ z5 ] then B3 else B4
+//     B3:
+//       v7   Goto B5
+//     B4:
+//       v8   Goto B5
+//     B5:
+//       i9   Phi [ i3 i2 ]
+//       v10  Return [ i9 ]
+// turns into
+//     B1:
+//       z1   ParameterValue
+//       i2   IntConstant 0
+//       v4   Goto B2
+//     B2:
+//       z11  Equals [ z1 i2 ]
+//       v10  Return [ z11 ]
+//     B3, B4, B5: removed
+
+// Note: in order to recognize empty blocks, this optimization must be run
+// after the instruction simplifier has removed redundant suspend checks.
+
+#ifndef ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
+#define ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class HBooleanSimplifier : public HOptimization {
+ public:
+  explicit HBooleanSimplifier(HGraph* graph)
+    : HOptimization(graph, true, kBooleanSimplifierPassName) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2cac93d..a21c311 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1024,8 +1024,6 @@
   HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
 
   uint16_t num_entries = table.GetNumEntries();
-  // There should be at least one entry here.
-  DCHECK_GT(num_entries, 0U);
 
   for (size_t i = 0; i < num_entries; i++) {
     BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value,
@@ -2060,31 +2058,13 @@
   return true;
 }  // NOLINT(readability/fn_size)
 
-HIntConstant* HGraphBuilder::GetIntConstant0() {
-  if (constant0_ != nullptr) {
-    return constant0_;
-  }
-  constant0_ = new(arena_) HIntConstant(0);
-  entry_block_->AddInstruction(constant0_);
-  return constant0_;
-}
-
-HIntConstant* HGraphBuilder::GetIntConstant1() {
-  if (constant1_ != nullptr) {
-    return constant1_;
-  }
-  constant1_ = new(arena_) HIntConstant(1);
-  entry_block_->AddInstruction(constant1_);
-  return constant1_;
-}
-
 HIntConstant* HGraphBuilder::GetIntConstant(int32_t constant) {
   switch (constant) {
-    case 0: return GetIntConstant0();
-    case 1: return GetIntConstant1();
+    case 0: return graph_->GetIntConstant0();
+    case 1: return graph_->GetIntConstant1();
     default: {
       HIntConstant* instruction = new (arena_) HIntConstant(constant);
-      entry_block_->AddInstruction(instruction);
+      graph_->AddConstant(instruction);
       return instruction;
     }
   }
@@ -2092,7 +2072,7 @@
 
 HLongConstant* HGraphBuilder::GetLongConstant(int64_t constant) {
   HLongConstant* instruction = new (arena_) HLongConstant(constant);
-  entry_block_->AddInstruction(instruction);
+  graph_->AddConstant(instruction);
   return instruction;
 }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 96196de..c70170b 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -47,8 +47,6 @@
         exit_block_(nullptr),
         current_block_(nullptr),
         graph_(graph),
-        constant0_(nullptr),
-        constant1_(nullptr),
         dex_file_(dex_file),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
@@ -67,8 +65,6 @@
         exit_block_(nullptr),
         current_block_(nullptr),
         graph_(graph),
-        constant0_(nullptr),
-        constant1_(nullptr),
         dex_file_(nullptr),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
@@ -100,8 +96,6 @@
   void MaybeUpdateCurrentBlock(size_t index);
   HBasicBlock* FindBlockStartingAt(int32_t index) const;
 
-  HIntConstant* GetIntConstant0();
-  HIntConstant* GetIntConstant1();
   HIntConstant* GetIntConstant(int32_t constant);
   HLongConstant* GetLongConstant(int64_t constant);
   void InitializeLocals(uint16_t count);
@@ -253,9 +247,6 @@
   HBasicBlock* current_block_;
   HGraph* const graph_;
 
-  HIntConstant* constant0_;
-  HIntConstant* constant1_;
-
   // The dex file where the method being compiled is.
   const DexFile* const dex_file_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 561dcb7..bd6e943 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,16 +40,6 @@
   return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
 }
 
-static bool IsSingleGoto(HBasicBlock* block) {
-  HLoopInformation* loop_info = block->GetLoopInformation();
-  // TODO: Remove the null check b/19084197.
-  return (block->GetFirstInstruction() != nullptr)
-      && (block->GetFirstInstruction() == block->GetLastInstruction())
-      && block->GetLastInstruction()->IsGoto()
-      // Back edges generate the suspend check.
-      && (loop_info == nullptr || !loop_info->IsBackEdge(block));
-}
-
 void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
   Initialize();
   if (!is_leaf) {
@@ -74,7 +64,7 @@
 HBasicBlock* CodeGenerator::GetNextBlockToEmit() const {
   for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) {
     HBasicBlock* block = block_order_->Get(i);
-    if (!IsSingleGoto(block)) {
+    if (!block->IsSingleGoto()) {
       return block;
     }
   }
@@ -82,7 +72,7 @@
 }
 
 HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
-  while (IsSingleGoto(block)) {
+  while (block->IsSingleGoto()) {
     block = block->GetSuccessors().Get(0);
   }
   return block;
@@ -97,7 +87,7 @@
     // Don't generate code for an empty block. Its predecessors will branch to its successor
     // directly. Also, the label of that block will not be emitted, so this helps catch
     // errors where we reference that label.
-    if (IsSingleGoto(block)) continue;
+    if (block->IsSingleGoto()) continue;
     Bind(block);
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
@@ -628,7 +618,7 @@
               ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
           DCHECK_LT(i, environment_size);
         } else if (current->IsDoubleConstant()) {
-          int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue());
+          int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(
               i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
           stack_map_stream_.AddDexRegisterEntry(
@@ -641,7 +631,7 @@
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant()) << current->DebugName();
-          int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
+          int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
         }
         break;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ecaa6f0..07ca6b1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -271,7 +271,7 @@
       return 0;
     } else {
       DCHECK(constant->IsFloatConstant());
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     }
   }
 
@@ -281,12 +281,12 @@
     } else if (constant->IsNullConstant()) {
       return 0;
     } else if (constant->IsFloatConstant()) {
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     } else if (constant->IsLongConstant()) {
       return constant->AsLongConstant()->GetValue();
     } else {
       DCHECK(constant->IsDoubleConstant());
-      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
     }
   }
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0357cea..97c470b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -883,7 +883,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1388,9 +1388,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
+  // The Java language does not allow treating boolean as an integral type but
+  // our bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1407,6 +1412,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1451,6 +1458,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1487,6 +1496,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1503,6 +1514,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1536,6 +1549,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1582,6 +1597,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1597,6 +1614,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1654,6 +1673,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1692,6 +1713,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1707,6 +1730,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1773,6 +1798,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index aeec5dd..9455a91 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -63,6 +63,7 @@
 using helpers::VIXLRegCodeFromART;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
+using helpers::ARM64EncodableConstantOrRegister;
 
 static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
 static constexpr int kCurrentMethodStackOffset = 0;
@@ -1106,7 +1107,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1398,7 +1399,7 @@
   switch (in_type) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
+      locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1468,7 +1469,7 @@
 void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
@@ -1620,7 +1621,7 @@
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
 
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -2116,7 +2117,7 @@
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RegisterOrConstant(neg->InputAt(0)));
+      locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 754dd10..4414a65 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -680,7 +680,7 @@
         value = constant->AsLongConstant()->GetValue();
       } else {
         DCHECK(constant->IsDoubleConstant());
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       }
       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
@@ -792,7 +792,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -1370,9 +1370,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
+  // The Java language does not allow treating boolean as an integral type but
+  // our bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1391,6 +1396,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1435,6 +1442,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1464,6 +1473,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1480,6 +1491,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1511,6 +1524,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1556,6 +1571,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1577,6 +1594,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1672,6 +1691,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1703,6 +1724,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1726,6 +1749,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1783,6 +1808,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -2730,26 +2757,45 @@
   Label less, greater, done;
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
+      Register left_low = left.AsRegisterPairLow<Register>();
+      Register left_high = left.AsRegisterPairHigh<Register>();
+      int32_t val_low = 0;
+      int32_t val_high = 0;
+      bool right_is_const = false;
+
+      if (right.IsConstant()) {
+        DCHECK(right.GetConstant()->IsLongConstant());
+        right_is_const = true;
+        int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
+        val_low = Low32Bits(val);
+        val_high = High32Bits(val);
+      }
+
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
+        __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+        __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_high == 0) {
+          __ testl(left_high, left_high);
+        } else {
+          __ cmpl(left_high, Immediate(val_high));
+        }
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>());
+        __ cmpl(left_low, right.AsRegisterPairLow<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
+        __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairLow<Register>(),
-                Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_low == 0) {
+          __ testl(left_low, left_low);
+        } else {
+          __ cmpl(left_low, Immediate(val_low));
+        }
       }
       break;
     }
@@ -3645,14 +3691,21 @@
         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
       }
     } else if (constant->IsFloatConstant()) {
-      float value = constant->AsFloatConstant()->GetValue();
-      Immediate imm(bit_cast<float, int32_t>(value));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<int32_t, float>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        ScratchRegisterScope ensure_scratch(
-            this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-        Register temp = static_cast<Register>(ensure_scratch.GetRegister());
-        __ movl(temp, imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), temp);
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // Easy handling of 0.0.
+          __ xorps(dest, dest);
+        } else {
+          ScratchRegisterScope ensure_scratch(
+              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+          __ movl(temp, Immediate(value));
+          __ movd(dest, temp);
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(ESP, destination.GetStackIndex()), imm);
@@ -3673,7 +3726,7 @@
     } else {
       DCHECK(constant->IsDoubleConstant());
       double dbl_value = constant->AsDoubleConstant()->GetValue();
-      int64_t value = bit_cast<double, int64_t>(dbl_value);
+      int64_t value = bit_cast<int64_t, double>(dbl_value);
       int32_t low_value = Low32Bits(value);
       int32_t high_value = High32Bits(value);
       Immediate low(low_value);
@@ -4107,18 +4160,38 @@
     } else {
       DCHECK(second.IsConstant()) << second;
       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-      Immediate low(Low32Bits(value));
-      Immediate high(High32Bits(value));
+      int32_t low_value = Low32Bits(value);
+      int32_t high_value = High32Bits(value);
+      Immediate low(low_value);
+      Immediate high(high_value);
+      Register first_low = first.AsRegisterPairLow<Register>();
+      Register first_high = first.AsRegisterPairHigh<Register>();
       if (instruction->IsAnd()) {
-        __ andl(first.AsRegisterPairLow<Register>(), low);
-        __ andl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value == 0) {
+          __ xorl(first_low, first_low);
+        } else if (low_value != -1) {
+          __ andl(first_low, low);
+        }
+        if (high_value == 0) {
+          __ xorl(first_high, first_high);
+        } else if (high_value != -1) {
+          __ andl(first_high, high);
+        }
       } else if (instruction->IsOr()) {
-        __ orl(first.AsRegisterPairLow<Register>(), low);
-        __ orl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ orl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ orl(first_high, high);
+        }
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.AsRegisterPairLow<Register>(), low);
-        __ xorl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ xorl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ xorl(first_high, high);
+        }
       }
     }
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dbd7c9e..c1f601e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -625,7 +625,7 @@
       HConstant* constant = source.GetConstant();
       int64_t value = constant->AsLongConstant()->GetValue();
       if (constant->IsDoubleConstant()) {
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       } else {
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
@@ -729,7 +729,7 @@
   HInstruction* previous = got->GetPrevious();
 
   HLoopInformation* info = block->GetLoopInformation();
-  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
@@ -956,7 +956,7 @@
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -982,7 +982,18 @@
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
-      __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>());
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        if (value == 0) {
+          __ testq(left_reg, left_reg);
+        } else {
+          __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+        }
+      } else {
+        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+      }
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1398,9 +1409,15 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // The Java language does not allow treating boolean as an integral type but
+  // our bit representation makes it safe.
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1417,6 +1434,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1461,6 +1480,8 @@
 
     case Primitive::kPrimLong:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1494,6 +1515,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1510,6 +1533,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1539,6 +1564,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1582,6 +1609,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1606,6 +1635,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
@@ -1704,6 +1735,8 @@
     case Primitive::kPrimLong:
       switch (input_type) {
         DCHECK(out.IsRegister());
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1771,6 +1804,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1795,6 +1830,8 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1821,6 +1858,8 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
+        case Primitive::kPrimBoolean:
+          // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
@@ -1865,17 +1904,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      HInstruction* rhs = add->InputAt(1);
-      bool is_int32_constant = false;
-      if (rhs->IsLongConstant()) {
-        int64_t value = rhs->AsLongConstant()->GetValue();
-        if (static_cast<int32_t>(value) == value) {
-          is_int32_constant = true;
-        }
-      }
-      locations->SetInAt(1,
-          is_int32_constant ? Location::RegisterOrConstant(rhs) :
-                              Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1973,7 +2002,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2007,7 +2036,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
@@ -2038,8 +2073,13 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::SameAsFirstInput());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
+      if (locations->InAt(1).IsConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimFloat:
@@ -2059,9 +2099,9 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.Equals(locations->Out()));
       if (second.IsRegister()) {
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
@@ -2075,16 +2115,27 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ imulq(locations->Out().AsRegister<CpuRegister>(),
+                 first.AsRegister<CpuRegister>(),
+                 Immediate(static_cast<int32_t>(value)));
+      } else {
+        DCHECK(first.Equals(locations->Out()));
+        __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
@@ -3320,20 +3371,35 @@
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     } else if (constant->IsFloatConstant()) {
-      Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<int32_t, float>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movl(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // easy FP 0.0.
+          __ xorps(dest, dest);
+        } else {
+          __ movl(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
-      Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+      double fp_value =  constant->AsDoubleConstant()->GetValue();
+      int64_t value = bit_cast<int64_t, double>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movq(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          __ xorpd(dest, dest);
+        } else {
+          __ movq(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), imm);
@@ -3673,8 +3739,9 @@
   if (instruction->GetType() == Primitive::kPrimInt) {
     locations->SetInAt(1, Location::Any());
   } else {
-    // Request a register to avoid loading a 64bits constant.
+    // We can handle 32 bit constants.
     locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
@@ -3730,13 +3797,34 @@
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    CpuRegister first_reg = first.AsRegister<CpuRegister>();
+    bool second_is_constant = false;
+    int64_t value = 0;
+    if (second.IsConstant()) {
+      second_is_constant = true;
+      value = second.GetConstant()->AsLongConstant()->GetValue();
+      DCHECK(IsInt<32>(value));
+    }
+
     if (instruction->IsAnd()) {
-      __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ andq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else if (instruction->IsOr()) {
-      __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ orq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else {
       DCHECK(instruction->IsXor());
-      __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ xorq(first_reg, second.AsRegister<CpuRegister>());
+      }
     }
   }
 }
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 868fc5b..40f0adc 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -145,6 +145,7 @@
                              std::function<void(HGraph*)> hook_before_codegen,
                              bool has_result,
                              Expected expected) {
+  graph->BuildDominatorTree();
   SsaLivenessAnalysis liveness(*graph, codegen);
   liveness.Analyze();
 
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 9447d3b..fd8c0c6 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -183,6 +183,40 @@
   }
 }
 
+static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant());
+
+  // For single uses we let VIXL handle the constant generation since it will
+  // use registers that are not managed by the register allocator (wip0, wip1).
+  if (constant->GetUses().HasOnlyOneUse()) {
+    return true;
+  }
+
+  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+  if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() || instr->IsCompare()) {
+    // Uses aliases of ADD/SUB instructions.
+    return vixl::Assembler::IsImmAddSub(value);
+  } else if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
+    // Uses logical operations.
+    return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+  } else {
+    DCHECK(instr->IsNeg());
+    // Uses mov -immediate.
+    return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+  }
+}
+
+static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
+                                                        HInstruction* instr) {
+  if (constant->IsConstant()
+      && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 76b9f4f..09a3ae4 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -227,13 +227,13 @@
   } else {
     HLoopInformation* loop_information = loop_header->GetLoopInformation();
     HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
-    if (loop_information->IsBackEdge(first_predecessor)) {
+    if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
     HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
-    if (!loop_information->IsBackEdge(second_predecessor)) {
+    if (!loop_information->IsBackEdge(*second_predecessor)) {
       AddError(StringPrintf(
           "Second predecessor of loop header %d is not a back edge.",
           id));
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index bd9267c..968fe3e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -118,6 +118,29 @@
     return false;
   }
 
+  if (resolved_method->ShouldNotInline()) {
+    VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file)
+                   << " was already flagged as non inlineable";
+    return false;
+  }
+
+  if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index)) {
+    resolved_method->SetShouldNotInline();
+    return false;
+  }
+
+  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file);
+  MaybeRecordStat(kInlinedInvoke);
+  return true;
+}
+
+bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
+                                 HInvoke* invoke_instruction,
+                                 uint32_t method_index) const {
+  ScopedObjectAccess soa(Thread::Current());
+  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+  const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile();
+
   DexCompilationUnit dex_compilation_unit(
     nullptr,
     outer_compilation_unit_.GetClassLoader(),
@@ -225,8 +248,6 @@
   // instruction id of the caller, so that new instructions added
   // after optimizations get a unique id.
   graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
-  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file);
-  MaybeRecordStat(kInlinedInvoke);
   return true;
 }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 2b08d3d..1251977 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -46,6 +46,9 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const;
+  bool TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
+                         HInvoke* invoke_instruction,
+                         uint32_t method_index) const;
 
   const DexCompilationUnit& outer_compilation_unit_;
   CompilerDriver* const compiler_driver_;
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 4ac1fe8..a1ae670 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -56,6 +56,19 @@
       : Location::RequiresRegister();
 }
 
+Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
+  if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) {
+    return Location::RequiresRegister();
+  }
+
+  // Does the long constant fit in a 32 bit int?
+  int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue();
+
+  return IsInt<32>(value)
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
   return instruction->IsConstant()
       ? Location::ConstantLocation(instruction->AsConstant())
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 566c0da..de876be 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -345,6 +345,7 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
+  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
 
   // The location of the first input to the instruction will be
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index a90ebce..4f6565d 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -185,7 +185,7 @@
   if (successor->IsLoopHeader()) {
     // If we split at a back edge boundary, make the new block the back edge.
     HLoopInformation* info = successor->GetLoopInformation();
-    if (info->IsBackEdge(block)) {
+    if (info->IsBackEdge(*block)) {
       info->RemoveBackEdge(block);
       info->AddBackEdge(new_block);
     }
@@ -287,19 +287,49 @@
   return true;
 }
 
+void HGraph::AddConstant(HConstant* instruction) {
+  HInstruction* last_instruction = entry_block_->GetLastInstruction();
+  if (last_instruction == nullptr || !last_instruction->IsControlFlow()) {
+    // Called from the builder. Insert at the end of the block.
+    entry_block_->AddInstruction(instruction);
+  } else {
+    // Entry block ends with control-flow. Insert before the last instruction.
+    entry_block_->InsertInstructionBefore(instruction, last_instruction);
+  }
+}
+
 HNullConstant* HGraph::GetNullConstant() {
   if (cached_null_constant_ == nullptr) {
     cached_null_constant_ = new (arena_) HNullConstant();
-    entry_block_->InsertInstructionBefore(cached_null_constant_,
-                                          entry_block_->GetLastInstruction());
+    AddConstant(cached_null_constant_);
   }
   return cached_null_constant_;
 }
 
+HIntConstant* HGraph::GetIntConstant0() {
+  if (cached_int_constant0_ == nullptr) {
+    cached_int_constant0_ = new (arena_) HIntConstant(0);
+    AddConstant(cached_int_constant0_);
+  }
+  return cached_int_constant0_;
+}
+
+HIntConstant* HGraph::GetIntConstant1() {
+  if (cached_int_constant1_ == nullptr) {
+    cached_int_constant1_ = new (arena_) HIntConstant(1);
+    AddConstant(cached_int_constant1_);
+  }
+  return cached_int_constant1_;
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
 
+void HLoopInformation::Remove(HBasicBlock* block) {
+  blocks_.ClearBit(block->GetBlockId());
+}
+
 void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
   if (blocks_.IsBitSet(block->GetBlockId())) {
     return;
@@ -621,7 +651,10 @@
 void HGraphVisitor::VisitInsertionOrder() {
   const GrowableArray<HBasicBlock*>& blocks = graph_->GetBlocks();
   for (size_t i = 0 ; i < blocks.Size(); i++) {
-    VisitBasicBlock(blocks.Get(i));
+    HBasicBlock* block = blocks.Get(i);
+    if (block != nullptr) {
+      VisitBasicBlock(block);
+    }
   }
 }
 
@@ -788,6 +821,25 @@
   return new_block;
 }
 
+bool HBasicBlock::IsSingleGoto() const {
+  HLoopInformation* loop_info = GetLoopInformation();
+  // TODO: Remove the null check b/19084197.
+  return GetFirstInstruction() != nullptr
+         && GetPhis().IsEmpty()
+         && GetFirstInstruction() == GetLastInstruction()
+         && GetLastInstruction()->IsGoto()
+         // Back edges generate the suspend check.
+         && (loop_info == nullptr || !loop_info->IsBackEdge(*this));
+}
+
+bool HBasicBlock::EndsWithIf() const {
+  return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf();
+}
+
+bool HBasicBlock::HasSinglePhi() const {
+  return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
+}
+
 void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const {
   for (HInstruction* current = first_instruction_;
        current != nullptr;
@@ -811,14 +863,35 @@
 }
 
 void HInstructionList::Add(const HInstructionList& instruction_list) {
-  DCHECK(!IsEmpty());
-  AddAfter(last_instruction_, instruction_list);
+  if (IsEmpty()) {
+    first_instruction_ = instruction_list.first_instruction_;
+    last_instruction_ = instruction_list.last_instruction_;
+  } else {
+    AddAfter(last_instruction_, instruction_list);
+  }
+}
+
+void HBasicBlock::DisconnectFromAll() {
+  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario";
+
+  for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
+    predecessors_.Get(i)->successors_.Delete(this);
+  }
+  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
+    successors_.Get(i)->predecessors_.Delete(this);
+  }
+  dominator_->dominated_blocks_.Delete(this);
+
+  predecessors_.Reset();
+  successors_.Reset();
+  dominator_ = nullptr;
+  graph_ = nullptr;
 }
 
 void HBasicBlock::MergeWith(HBasicBlock* other) {
   DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(other->GetDominator()->IsEntryBlock() && other->GetGraph() != graph_)
+  DCHECK(dominated_blocks_.IsEmpty()
+         || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other))
       << "Unimplemented block merge scenario";
   DCHECK(other->GetPhis().IsEmpty());
 
@@ -1006,7 +1079,7 @@
     if (info != nullptr) {
       info->Add(to);
       to->SetLoopInformation(info);
-      if (info->IsBackEdge(at)) {
+      if (info->IsBackEdge(*at)) {
         // Only `at` can become a back edge, as the inlined blocks
         // are predecessors of `at`.
         DCHECK_EQ(1u, info->NumberOfBackEdges());
@@ -1020,6 +1093,53 @@
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
 
+void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) {
+  // Find the two branches of an If.
+  DCHECK_EQ(start_block->GetSuccessors().Size(), 2u);
+  HBasicBlock* left_branch = start_block->GetSuccessors().Get(0);
+  HBasicBlock* right_branch = start_block->GetSuccessors().Get(1);
+
+  // Make sure this is a diamond control-flow path.
+  DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block);
+  DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block);
+  DCHECK_EQ(end_block->GetPredecessors().Size(), 2u);
+  DCHECK_EQ(start_block, end_block->GetDominator());
+
+  // Disconnect the branches and merge the two blocks. This will move
+  // all instructions from 'end_block' to 'start_block'.
+  DCHECK(left_branch->IsSingleGoto());
+  DCHECK(right_branch->IsSingleGoto());
+  left_branch->DisconnectFromAll();
+  right_branch->DisconnectFromAll();
+  start_block->RemoveInstruction(start_block->GetLastInstruction());
+  start_block->MergeWith(end_block);
+
+  // Delete the now redundant blocks from the graph.
+  blocks_.Put(left_branch->GetBlockId(), nullptr);
+  blocks_.Put(right_branch->GetBlockId(), nullptr);
+  blocks_.Put(end_block->GetBlockId(), nullptr);
+
+  // Update reverse post order.
+  reverse_post_order_.Delete(left_branch);
+  reverse_post_order_.Delete(right_branch);
+  reverse_post_order_.Delete(end_block);
+
+  // Update loops which contain the code.
+  for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    DCHECK(loop_info->Contains(*left_branch));
+    DCHECK(loop_info->Contains(*right_branch));
+    DCHECK(loop_info->Contains(*end_block));
+    loop_info->Remove(left_branch);
+    loop_info->Remove(right_branch);
+    loop_info->Remove(end_block);
+    if (loop_info->IsBackEdge(*end_block)) {
+      loop_info->RemoveBackEdge(end_block);
+      loop_info->AddBackEdge(start_block);
+    }
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
   ScopedObjectAccess soa(Thread::Current());
   os << "["
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index a35fa1d..664cf18 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -128,6 +128,7 @@
   void SetExitBlock(HBasicBlock* block) { exit_block_ = block; }
 
   void AddBlock(HBasicBlock* block);
+  void AddConstant(HConstant* instruction);
 
   // Try building the SSA form of this graph, with dominance computation and loop
   // recognition. Returns whether it was successful in doing all these steps.
@@ -154,6 +155,8 @@
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
   void InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
+  void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block);
+
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
   void SimplifyLoop(HBasicBlock* header);
 
@@ -217,6 +220,8 @@
   bool IsDebuggable() const { return debuggable_; }
 
   HNullConstant* GetNullConstant();
+  HIntConstant* GetIntConstant0();
+  HIntConstant* GetIntConstant1();
 
  private:
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
@@ -267,6 +272,10 @@
   // Cached null constant that might be created when building SSA form.
   HNullConstant* cached_null_constant_;
 
+  // Cached common constants often needed by optimization passes.
+  HIntConstant* cached_int_constant0_;
+  HIntConstant* cached_int_constant1_;
+
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -300,9 +309,9 @@
     back_edges_.Delete(back_edge);
   }
 
-  bool IsBackEdge(HBasicBlock* block) {
+  bool IsBackEdge(const HBasicBlock& block) const {
     for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
-      if (back_edges_.Get(i) == block) return true;
+      if (back_edges_.Get(i) == &block) return true;
     }
     return false;
   }
@@ -336,6 +345,7 @@
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
   void Add(HBasicBlock* block);
+  void Remove(HBasicBlock* block);
 
  private:
   // Internal recursive implementation of `Populate`.
@@ -391,6 +401,8 @@
     return graph_->GetExitBlock() == this;
   }
 
+  bool IsSingleGoto() const;
+
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
@@ -512,8 +524,16 @@
   // of `this` are moved to `other`.
   // Note that this method does not update the graph, reverse post order, loop
   // information, nor make sure the blocks are consistent (for example ending
+  // with a control flow instruction).
   void ReplaceWith(HBasicBlock* other);
 
+  // Disconnects `this` from all its predecessors, successors and the dominator.
+  // It assumes that `this` does not dominate any blocks.
+  // Note that this method does not update the graph, reverse post order, loop
+  // information, nor make sure the blocks are consistent (for example ending
+  // with a control flow instruction).
+  void DisconnectFromAll();
+
   void AddInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
@@ -582,6 +602,9 @@
   bool IsCatchBlock() const { return is_catch_block_; }
   void SetIsCatchBlock() { is_catch_block_ = true; }
 
+  bool EndsWithIf() const;
+  bool HasSinglePhi() const;
+
  private:
   HGraph* graph_;
   GrowableArray<HBasicBlock*> predecessors_;
@@ -604,6 +627,31 @@
   DISALLOW_COPY_AND_ASSIGN(HBasicBlock);
 };
 
+// Iterates over the LoopInformation of all loops which contain 'block'
+// from the innermost to the outermost.
+class HLoopInformationOutwardIterator : public ValueObject {
+ public:
+  explicit HLoopInformationOutwardIterator(const HBasicBlock& block)
+      : current_(block.GetLoopInformation()) {}
+
+  bool Done() const { return current_ == nullptr; }
+
+  void Advance() {
+    DCHECK(!Done());
+    current_ = current_->GetHeader()->GetDominator()->GetLoopInformation();
+  }
+
+  HLoopInformation* Current() const {
+    DCHECK(!Done());
+    return current_;
+  }
+
+ private:
+  HLoopInformation* current_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator);
+};
+
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   M(Add, BinaryOperation)                                               \
   M(And, BinaryOperation)                                               \
@@ -1873,20 +1921,22 @@
   float GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) ==
-        bit_cast<float, int32_t>(value_);
+    return bit_cast<uint32_t, float>(other->AsFloatConstant()->value_) ==
+        bit_cast<uint32_t, float>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f));
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>((-1.0f));
   }
   bool IsZero() const OVERRIDE {
     return AsFloatConstant()->GetValue() == 0.0f;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f);
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>(1.0f);
   }
 
   DECLARE_INSTRUCTION(FloatConstant);
@@ -1904,20 +1954,22 @@
   double GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) ==
-        bit_cast<double, int64_t>(value_);
+    return bit_cast<uint64_t, double>(other->AsDoubleConstant()->value_) ==
+        bit_cast<uint64_t, double>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0));
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>((-1.0));
   }
   bool IsZero() const OVERRIDE {
     return AsDoubleConstant()->GetValue() == 0.0;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0);
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>(1.0);
   }
 
   DECLARE_INSTRUCTION(DoubleConstant);
@@ -3459,7 +3511,10 @@
 
 class HReversePostOrderIterator : public ValueObject {
  public:
-  explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
+  explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {
+    // Check that reverse post order of the graph has been built.
+    DCHECK(!graph.GetReversePostOrder().IsEmpty());
+  }
 
   bool Done() const { return index_ == graph_.GetReversePostOrder().Size(); }
   HBasicBlock* Current() const { return graph_.GetReversePostOrder().Get(index_); }
@@ -3475,7 +3530,10 @@
 class HPostOrderIterator : public ValueObject {
  public:
   explicit HPostOrderIterator(const HGraph& graph)
-      : graph_(graph), index_(graph_.GetReversePostOrder().Size()) {}
+      : graph_(graph), index_(graph_.GetReversePostOrder().Size()) {
+    // Check that reverse post order of the graph has been built.
+    DCHECK(!graph.GetReversePostOrder().IsEmpty());
+  }
 
   bool Done() const { return index_ == 0; }
   HBasicBlock* Current() const { return graph_.GetReversePostOrder().Get(index_ - 1); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b70f925..eaa30df 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,6 +22,7 @@
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/timing_logger.h"
+#include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -313,6 +314,7 @@
   HDeadCodeElimination dce(graph);
   HConstantFolding fold1(graph);
   InstructionSimplifier simplify1(graph, stats);
+  HBooleanSimplifier boolean_not(graph);
 
   HInliner inliner(graph, dex_compilation_unit, driver, stats);
 
@@ -331,6 +333,9 @@
     &dce,
     &fold1,
     &simplify1,
+    // BooleanSimplifier depends on the InstructionSimplifier removing redundant
+    // suspend checks to recognize empty blocks.
+    &boolean_not,
     &inliner,
     &fold2,
     &side_effects,
@@ -462,13 +467,22 @@
     return nullptr;
   }
 
+  // Implementation of the space filter: do not compile a code item whose size in
+  // code units is bigger than 256.
+  static constexpr size_t kSpaceFilterOptimizingThreshold = 256;
+  const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
+  if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
+      && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
+    compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+    return nullptr;
+  }
+
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
     compiler_driver->GetVerifiedMethod(&dex_file, method_idx));
 
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
+  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
   HGraph* graph = new (&arena) HGraph(
       &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 3ebf0f8..22ec2a5 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,6 +38,7 @@
   kNotCompiledUnresolvedMethod,
   kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
+  kNotCompiledSpaceFilter,
   kNotOptimizedTryCatch,
   kNotOptimizedDisabled,
   kNotCompiledCantAccesType,
@@ -96,6 +97,7 @@
       case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
       case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
       case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index b757a3b..7a2d84b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -596,6 +596,8 @@
   graph->AddBlock(exit);
   block->AddSuccessor(exit);
   exit->AddInstruction(new (allocator) HExit());
+
+  graph->BuildDominatorTree();
   return graph;
 }
 
@@ -658,6 +660,8 @@
   block->AddInstruction(*second_sub);
 
   block->AddInstruction(new (allocator) HExit());
+
+  graph->BuildDominatorTree();
   return graph;
 }
 
@@ -719,6 +723,8 @@
   block->AddInstruction(*div);
 
   block->AddInstruction(new (allocator) HExit());
+
+  graph->BuildDominatorTree();
   return graph;
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ba11e90..ae6bf16 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -359,12 +359,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue()));
+    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int32_t, float>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
@@ -381,12 +381,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue()));
+    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int64_t, double>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index c0d6f42..56ccd71 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -71,8 +71,8 @@
   //      for it.
   GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size());
   forward_predecessors.SetSize(graph_.GetBlocks().Size());
-  for (size_t i = 0, e = graph_.GetBlocks().Size(); i < e; ++i) {
-    HBasicBlock* block = graph_.GetBlocks().Get(i);
+  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().Size();
     if (block->IsLoopHeader()) {
       // We rely on having simplified the CFG.
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index e1a5afe..5818a37 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -17,7 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_
 #define ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_
 
-#include "base/bit_vector.h"
+#include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
 #include "base/value_object.h"
 #include "memory_region.h"
 #include "nodes.h"
@@ -38,7 +39,10 @@
         dex_register_locations_(allocator, 10 * 4),
         inline_infos_(allocator, 2),
         stack_mask_max_(-1),
-        number_of_stack_maps_with_inline_info_(0) {}
+        dex_pc_max_(0),
+        native_pc_offset_max_(0),
+        number_of_stack_maps_with_inline_info_(0),
+        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
 
   // Compute bytes needed to encode a mask with the given maximum element.
   static uint32_t StackMaskEncodingSize(int max_element) {
@@ -57,6 +61,7 @@
     size_t dex_register_locations_start_index;
     size_t inline_infos_start_index;
     BitVector* live_dex_registers_mask;
+    uint32_t dex_register_map_hash;
   };
 
   struct InlineInfoEntry {
@@ -78,6 +83,7 @@
     entry.inlining_depth = inlining_depth;
     entry.dex_register_locations_start_index = dex_register_locations_.Size();
     entry.inline_infos_start_index = inline_infos_.Size();
+    entry.dex_register_map_hash = 0;
     if (num_dex_registers != 0) {
       entry.live_dex_registers_mask =
           new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
@@ -92,6 +98,9 @@
     if (inlining_depth > 0) {
       number_of_stack_maps_with_inline_info_++;
     }
+
+    dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
+    native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
   }
 
   void AddInlineInfoEntry(uint32_t method_index) {
@@ -100,7 +109,7 @@
     inline_infos_.Add(entry);
   }
 
-  size_t ComputeNeededSize() const {
+  size_t ComputeNeededSize() {
     size_t size = CodeInfo::kFixedSize
         + ComputeStackMapsSize()
         + ComputeDexRegisterMapsSize()
@@ -113,8 +122,13 @@
     return StackMaskEncodingSize(stack_mask_max_);
   }
 
-  size_t ComputeStackMapsSize() const {
-    return stack_maps_.Size() * StackMap::ComputeStackMapSize(ComputeStackMaskSize());
+  size_t ComputeStackMapsSize() {
+    return stack_maps_.Size() * StackMap::ComputeStackMapSize(
+        ComputeStackMaskSize(),
+        ComputeInlineInfoSize(),
+        ComputeDexRegisterMapsSize(),
+        dex_pc_max_,
+        native_pc_offset_max_);
   }
 
   // Compute the size of the Dex register map of `entry`.
@@ -136,10 +150,13 @@
   }
 
   // Compute the size of all the Dex register maps.
-  size_t ComputeDexRegisterMapsSize() const {
+  size_t ComputeDexRegisterMapsSize() {
     size_t size = 0;
     for (size_t i = 0; i < stack_maps_.Size(); ++i) {
-      size += ComputeDexRegisterMapSize(stack_maps_.Get(i));
+      if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) {
+        // Entries with the same dex map will have the same offset.
+        size += ComputeDexRegisterMapSize(stack_maps_.Get(i));
+      }
     }
     return size;
   }
@@ -151,11 +168,11 @@
       + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
   }
 
-  size_t ComputeDexRegisterMapsStart() const {
+  size_t ComputeDexRegisterMapsStart() {
     return CodeInfo::kFixedSize + ComputeStackMapsSize();
   }
 
-  size_t ComputeInlineInfoStart() const {
+  size_t ComputeInlineInfoStart() {
     return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize();
   }
 
@@ -165,16 +182,20 @@
     code_info.SetOverallSize(region.size());
 
     size_t stack_mask_size = ComputeStackMaskSize();
-    uint8_t* memory_start = region.start();
+
+    size_t dex_register_map_size = ComputeDexRegisterMapsSize();
+    size_t inline_info_size = ComputeInlineInfoSize();
 
     MemoryRegion dex_register_locations_region = region.Subregion(
       ComputeDexRegisterMapsStart(),
-      ComputeDexRegisterMapsSize());
+      dex_register_map_size);
 
     MemoryRegion inline_infos_region = region.Subregion(
       ComputeInlineInfoStart(),
-      ComputeInlineInfoSize());
+      inline_info_size);
 
+    code_info.SetEncoding(
+        inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_);
     code_info.SetNumberOfStackMaps(stack_maps_.Size());
     code_info.SetStackMaskSize(stack_mask_size);
     DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize());
@@ -185,44 +206,54 @@
       StackMap stack_map = code_info.GetStackMapAt(i);
       StackMapEntry entry = stack_maps_.Get(i);
 
-      stack_map.SetDexPc(entry.dex_pc);
-      stack_map.SetNativePcOffset(entry.native_pc_offset);
-      stack_map.SetRegisterMask(entry.register_mask);
+      stack_map.SetDexPc(code_info, entry.dex_pc);
+      stack_map.SetNativePcOffset(code_info, entry.native_pc_offset);
+      stack_map.SetRegisterMask(code_info, entry.register_mask);
       if (entry.sp_mask != nullptr) {
-        stack_map.SetStackMask(*entry.sp_mask);
+        stack_map.SetStackMask(code_info, *entry.sp_mask);
       }
 
-      if (entry.num_dex_registers != 0) {
-        // Set the Dex register map.
-        MemoryRegion register_region =
-            dex_register_locations_region.Subregion(
-                next_dex_register_map_offset,
-                ComputeDexRegisterMapSize(entry));
-        next_dex_register_map_offset += register_region.size();
-        DexRegisterMap dex_register_map(register_region);
-        stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
-
-        // Offset in `dex_register_map` where to store the next register entry.
-        size_t offset = DexRegisterMap::kFixedSize;
-        dex_register_map.SetLiveBitMask(offset,
-                                        entry.num_dex_registers,
-                                        *entry.live_dex_registers_mask);
-        offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
-        for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
-             dex_register_number < entry.num_dex_registers;
-             ++dex_register_number) {
-          if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
-            DexRegisterLocation dex_register_location = dex_register_locations_.Get(
-                entry.dex_register_locations_start_index + index_in_dex_register_locations);
-            dex_register_map.SetRegisterInfo(offset, dex_register_location);
-            offset += DexRegisterMap::EntrySize(dex_register_location);
-            ++index_in_dex_register_locations;
-          }
-        }
-        // Ensure we reached the end of the Dex registers region.
-        DCHECK_EQ(offset, register_region.size());
+      if (entry.num_dex_registers == 0) {
+        // No dex map available.
+        stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap);
       } else {
-        stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap);
+        // Search for an entry with the same dex map.
+        size_t entry_with_same_map = FindEntryWithTheSameDexMap(i);
+        if (entry_with_same_map != kNoSameDexMapFound) {
+          // If we have a hit reuse the offset.
+          stack_map.SetDexRegisterMapOffset(code_info,
+              code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info));
+        } else {
+          // New dex registers maps should be added to the stack map.
+          MemoryRegion register_region =
+              dex_register_locations_region.Subregion(
+                  next_dex_register_map_offset,
+                  ComputeDexRegisterMapSize(entry));
+          next_dex_register_map_offset += register_region.size();
+          DexRegisterMap dex_register_map(register_region);
+          stack_map.SetDexRegisterMapOffset(
+            code_info, register_region.start() - dex_register_locations_region.start());
+
+          // Offset in `dex_register_map` where to store the next register entry.
+          size_t offset = DexRegisterMap::kFixedSize;
+          dex_register_map.SetLiveBitMask(offset,
+                                          entry.num_dex_registers,
+                                          *entry.live_dex_registers_mask);
+          offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
+          for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+               dex_register_number < entry.num_dex_registers;
+               ++dex_register_number) {
+            if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
+              DexRegisterLocation dex_register_location = dex_register_locations_.Get(
+                  entry.dex_register_locations_start_index + index_in_dex_register_locations);
+              dex_register_map.SetRegisterInfo(offset, dex_register_location);
+              offset += DexRegisterMap::EntrySize(dex_register_location);
+              ++index_in_dex_register_locations;
+            }
+          }
+          // Ensure we reached the end of the Dex registers region.
+          DCHECK_EQ(offset, register_region.size());
+        }
       }
 
       // Set the inlining info.
@@ -233,7 +264,9 @@
         next_inline_info_offset += inline_region.size();
         InlineInfo inline_info(inline_region);
 
-        stack_map.SetInlineDescriptorOffset(inline_region.start() - memory_start);
+        // Currently relative to the dex register map.
+        stack_map.SetInlineDescriptorOffset(
+            code_info, inline_region.start() - dex_register_locations_region.start());
 
         inline_info.SetDepth(entry.inlining_depth);
         for (size_t j = 0; j < entry.inlining_depth; ++j) {
@@ -241,7 +274,9 @@
           inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
         }
       } else {
-        stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo);
+        if (inline_info_size != 0) {
+          stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo);
+        }
       }
     }
   }
@@ -252,18 +287,99 @@
       DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
           << DexRegisterLocation::PrettyDescriptor(kind);
       dex_register_locations_.Add(DexRegisterLocation(kind, value));
-      stack_maps_.Get(stack_maps_.Size() - 1).live_dex_registers_mask->SetBit(dex_register);
+      StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+      entry.live_dex_registers_mask->SetBit(dex_register);
+      entry.dex_register_map_hash += (1 << dex_register);
+      entry.dex_register_map_hash += static_cast<uint32_t>(value);
+      entry.dex_register_map_hash += static_cast<uint32_t>(kind);
+      stack_maps_.Put(stack_maps_.Size() - 1, entry);
     }
   }
 
  private:
+  // Returns the index of an entry with the same dex register map
+  // or kNoSameDexMapFound if no such entry exists.
+  size_t FindEntryWithTheSameDexMap(size_t entry_index) {
+    StackMapEntry entry = stack_maps_.Get(entry_index);
+    auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash);
+    if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
+      // We don't have a perfect hash functions so we need a list to collect all stack maps
+      // which might have the same dex register map.
+      GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
+      stack_map_indices.Add(entry_index);
+      dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices);
+      return kNoSameDexMapFound;
+    }
+
+    // TODO: We don't need to add ourselves to the map if we can guarantee that
+    // FindEntryWithTheSameDexMap is called just once per stack map entry.
+    // A good way to do this is to cache the offset in the stack map entry. This
+    // is easier to do if we add markers when the stack map constructions begins
+    // and when it ends.
+
+    // We might have collisions, so we need to check whether or not we should
+    // add the entry to the map. `needs_to_be_added` keeps track of this.
+    bool needs_to_be_added = true;
+    size_t result = kNoSameDexMapFound;
+    for (size_t i = 0; i < entries_it->second.Size(); i++) {
+      size_t test_entry_index = entries_it->second.Get(i);
+      if (test_entry_index == entry_index) {
+        needs_to_be_added = false;
+      } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) {
+        result = test_entry_index;
+        needs_to_be_added = false;
+        break;
+      }
+    }
+    if (needs_to_be_added) {
+      entries_it->second.Add(entry_index);
+    }
+    return result;
+  }
+
+  bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const {
+    if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) {
+      return true;
+    }
+    if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) {
+      return false;
+    }
+    if (a.num_dex_registers != b.num_dex_registers) {
+      return false;
+    }
+
+    int index_in_dex_register_locations = 0;
+    for (uint32_t i = 0; i < a.num_dex_registers; i++) {
+      if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) {
+        return false;
+      }
+      if (a.live_dex_registers_mask->IsBitSet(i)) {
+        DexRegisterLocation a_loc = dex_register_locations_.Get(
+            a.dex_register_locations_start_index + index_in_dex_register_locations);
+        DexRegisterLocation b_loc = dex_register_locations_.Get(
+            b.dex_register_locations_start_index + index_in_dex_register_locations);
+        if (a_loc != b_loc) {
+          return false;
+        }
+        ++index_in_dex_register_locations;
+      }
+    }
+    return true;
+  }
+
   ArenaAllocator* allocator_;
   GrowableArray<StackMapEntry> stack_maps_;
   GrowableArray<DexRegisterLocation> dex_register_locations_;
   GrowableArray<InlineInfoEntry> inline_infos_;
   int stack_mask_max_;
+  uint32_t dex_pc_max_;
+  uint32_t native_pc_offset_max_;
   size_t number_of_stack_maps_with_inline_info_;
 
+  ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
+
+  static constexpr uint32_t kNoSameDexMapFound = -1;
+
   ART_FRIEND_TEST(StackMapTest, Test1);
   ART_FRIEND_TEST(StackMapTest, Test2);
   ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters);
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 87ac2e7..e5a9790 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -54,14 +54,14 @@
   StackMap stack_map = code_info.GetStackMapAt(0);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
-  ASSERT_EQ(0u, stack_map.GetDexPc());
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset());
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask());
+  ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
 
-  MemoryRegion stack_mask = stack_map.GetStackMask();
+  MemoryRegion stack_mask = stack_map.GetStackMask(code_info);
   ASSERT_TRUE(SameBits(stack_mask, sp_mask));
 
-  ASSERT_TRUE(stack_map.HasDexRegisterMap());
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
   DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
   ASSERT_EQ(7u, dex_registers.Size());
   DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
@@ -73,7 +73,7 @@
   ASSERT_EQ(0, location0.GetValue());
   ASSERT_EQ(-2, location1.GetValue());
 
-  ASSERT_FALSE(stack_map.HasInlineInfo());
+  ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
 }
 
 TEST(StackMapTest, Test2) {
@@ -112,14 +112,14 @@
     StackMap stack_map = code_info.GetStackMapAt(0);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
-    ASSERT_EQ(0u, stack_map.GetDexPc());
-    ASSERT_EQ(64u, stack_map.GetNativePcOffset());
-    ASSERT_EQ(0x3u, stack_map.GetRegisterMask());
+    ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+    ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+    ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask();
+    MemoryRegion stack_mask = stack_map.GetStackMask(code_info);
     ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap());
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
     DexRegisterMap dex_registers =
         code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
     ASSERT_EQ(7u, dex_registers.Size());
@@ -134,7 +134,7 @@
     ASSERT_EQ(0, location0.GetValue());
     ASSERT_EQ(-2, location1.GetValue());
 
-    ASSERT_TRUE(stack_map.HasInlineInfo());
+    ASSERT_TRUE(stack_map.HasInlineInfo(code_info));
     InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map);
     ASSERT_EQ(2u, inline_info.GetDepth());
     ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0));
@@ -146,14 +146,14 @@
     StackMap stack_map = code_info.GetStackMapAt(1);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u)));
-    ASSERT_EQ(1u, stack_map.GetDexPc());
-    ASSERT_EQ(128u, stack_map.GetNativePcOffset());
-    ASSERT_EQ(0xFFu, stack_map.GetRegisterMask());
+    ASSERT_EQ(1u, stack_map.GetDexPc(code_info));
+    ASSERT_EQ(128u, stack_map.GetNativePcOffset(code_info));
+    ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(code_info));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask();
+    MemoryRegion stack_mask = stack_map.GetStackMask(code_info);
     ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap());
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
     DexRegisterMap dex_registers =
         code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
     ASSERT_EQ(3u, dex_registers.Size());
@@ -168,7 +168,7 @@
     ASSERT_EQ(18, location0.GetValue());
     ASSERT_EQ(3, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo());
+    ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
   }
 }
 
@@ -190,14 +190,95 @@
 
   CodeInfo code_info(region);
   StackMap stack_map = code_info.GetStackMapAt(0);
-  ASSERT_TRUE(stack_map.HasDexRegisterMap());
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
   DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
   ASSERT_EQ(DexRegisterLocation::Kind::kNone,
             dex_registers.GetLocationKind(0, number_of_dex_registers));
   ASSERT_EQ(DexRegisterLocation::Kind::kConstant,
             dex_registers.GetLocationKind(1, number_of_dex_registers));
   ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers));
-  ASSERT_FALSE(stack_map.HasInlineInfo());
+  ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
+}
+
+// Generate a stack map whose dex register offset is
+// StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do
+// not treat it as kNoDexRegisterMap.
+TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena);
+
+  ArenaBitVector sp_mask(&arena, 0, false);
+  uint32_t number_of_dex_registers = 0xEA;
+  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) {
+    stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+  }
+  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
+    stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+  }
+
+  size_t size = stream.ComputeNeededSize();
+  void* memory = arena.Alloc(size, kArenaAllocMisc);
+  MemoryRegion region(memory, size);
+  stream.FillIn(region);
+
+  CodeInfo code_info(region);
+  StackMap stack_map = code_info.GetStackMapAt(1);
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
+  ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
+  ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding);
+}
+
+TEST(StackMapTest, TestShareDexRegisterMap) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena);
+
+  ArenaBitVector sp_mask(&arena, 0, false);
+  uint32_t number_of_dex_registers = 2;
+  // First stack map.
+  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
+  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  // Second stack map, which should share the same dex register map.
+  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
+  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  // Third stack map (doesn't share the dex register map).
+  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2);
+  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+
+  size_t size = stream.ComputeNeededSize();
+  void* memory = arena.Alloc(size, kArenaAllocMisc);
+  MemoryRegion region(memory, size);
+  stream.FillIn(region);
+
+  CodeInfo ci(region);
+  // Verify first stack map.
+  StackMap sm0 = ci.GetStackMapAt(0);
+  DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers);
+  ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers));
+  ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers));
+
+  // Verify second stack map.
+  StackMap sm1 = ci.GetStackMapAt(1);
+  DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers);
+  ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers));
+  ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers));
+
+  // Verify third stack map.
+  StackMap sm2 = ci.GetStackMapAt(2);
+  DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers);
+  ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers));
+  ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers));
+
+  // Verify dex register map offsets.
+  ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci));
+  ASSERT_NE(sm0.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci));
+  ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci));
 }
 
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f2704b7..bd155ed 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1277,6 +1277,14 @@
 }
 
 
+void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // orq only supports 32b immediate.
+  EmitRex64(dst);
+  EmitComplex(1, Operand(dst), imm);
+}
+
+
 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(dst, src);
@@ -1548,27 +1556,30 @@
 
 
 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
+  imulq(reg, reg, imm);
+}
+
+void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
 
-  EmitRex64(reg, reg);
+  EmitRex64(dst, reg);
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int64_t v64 = imm.value();
   if (IsInt<8>(v64)) {
     // Sign-extension works.
     EmitUint8(0x6B);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
   } else {
     // Not representable, use full immediate.
     EmitUint8(0x69);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitImmediate(imm);
   }
 }
 
-
 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 5dfcf45..495f74f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -429,6 +429,7 @@
   void orl(CpuRegister dst, CpuRegister src);
   void orl(CpuRegister reg, const Address& address);
   void orq(CpuRegister dst, CpuRegister src);
+  void orq(CpuRegister dst, const Immediate& imm);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorl(CpuRegister dst, const Immediate& imm);
@@ -467,6 +468,7 @@
   void imulq(CpuRegister dst, CpuRegister src);
   void imulq(CpuRegister reg, const Immediate& imm);
   void imulq(CpuRegister reg, const Address& address);
+  void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
 
   void imull(CpuRegister reg);
   void imull(const Address& address);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index a372179..dfea783 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -109,9 +109,14 @@
 
   UsageError("Usage: dex2oat [options]...");
   UsageError("");
-  UsageError("  --dex-file=<dex-file>: specifies a .dex file to compile.");
+  UsageError("  --dex-file=<dex-file>: specifies a .dex, .jar, or .apk file to compile.");
   UsageError("      Example: --dex-file=/system/framework/core.jar");
   UsageError("");
+  UsageError("  --dex-location=<dex-location>: specifies an alternative dex location to");
+  UsageError("      encode in the oat file for the corresponding --dex-file argument.");
+  UsageError("      Example: --dex-file=/home/build/out/system/framework/core.jar");
+  UsageError("               --dex-location=/system/framework/core.jar");
+  UsageError("");
   UsageError("  --zip-fd=<file-descriptor>: specifies a file descriptor of a zip file");
   UsageError("      containing a classes.dex file to compile.");
   UsageError("      Example: --zip-fd=5");
@@ -614,7 +619,6 @@
           Usage("Unknown compiler backend: %s", backend_str.data());
         }
       } else if (option.starts_with("--compiler-filter=")) {
-        requested_specific_compiler = true;
         compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
       } else if (option == "--compile-pic") {
         compile_pic = true;
@@ -877,15 +881,7 @@
     }
 
     if (compiler_filter_string == nullptr) {
-      if (instruction_set_ == kMips &&
-          reinterpret_cast<const MipsInstructionSetFeatures*>(instruction_set_features_.get())->
-          IsR6()) {
-        // For R6, only interpreter mode is working.
-        // TODO: fix compiler for Mips32r6.
-        compiler_filter_string = "interpret-only";
-      } else {
-        compiler_filter_string = "speed";
-      }
+      compiler_filter_string = "speed";
     }
 
     CHECK(compiler_filter_string != nullptr);
@@ -894,6 +890,8 @@
       compiler_filter = CompilerOptions::kVerifyNone;
     } else if (strcmp(compiler_filter_string, "interpret-only") == 0) {
       compiler_filter = CompilerOptions::kInterpretOnly;
+    } else if (strcmp(compiler_filter_string, "verify-at-runtime") == 0) {
+      compiler_filter = CompilerOptions::kVerifyAtRuntime;
     } else if (strcmp(compiler_filter_string, "space") == 0) {
       compiler_filter = CompilerOptions::kSpace;
     } else if (strcmp(compiler_filter_string, "balanced") == 0) {
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 3d8a567..b27b555 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -71,10 +71,14 @@
   { kRTypeMask, 17, "mthi", "S", },
   { kRTypeMask, 18, "mflo", "D", },
   { kRTypeMask, 19, "mtlo", "S", },
-  { kRTypeMask, 24, "mult", "ST", },
-  { kRTypeMask, 25, "multu", "ST", },
-  { kRTypeMask, 26, "div", "ST", },
-  { kRTypeMask, 27, "divu", "ST", },
+  { kRTypeMask | (0x1f << 6), 24, "mult", "ST", },
+  { kRTypeMask | (0x1f << 6), 25, "multu", "ST", },
+  { kRTypeMask | (0x1f << 6), 26, "div", "ST", },
+  { kRTypeMask | (0x1f << 6), 27, "divu", "ST", },
+  { kRTypeMask | (0x1f << 6), 24 + (2 << 6), "mul", "DST", },
+  { kRTypeMask | (0x1f << 6), 24 + (3 << 6), "muh", "DST", },
+  { kRTypeMask | (0x1f << 6), 26 + (2 << 6), "div", "DST", },
+  { kRTypeMask | (0x1f << 6), 26 + (3 << 6), "mod", "DST", },
   { kRTypeMask, 32, "add", "DST", },
   { kRTypeMask, 33, "addu", "DST", },
   { kRTypeMask, 34, "sub", "DST", },
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index c27b3d4..daca971 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1039,62 +1039,11 @@
     }
   }
 
-  void DumpRegisterMapping(std::ostream& os,
-                           size_t dex_register_num,
-                           DexRegisterLocation::Kind kind,
-                           int32_t value,
-                           const std::string& prefix = "v",
-                           const std::string& suffix = "") {
-    os << "      " << prefix << dex_register_num << ": "
-       << DexRegisterLocation::PrettyDescriptor(kind)
-       << " (" << value << ")" << suffix << '\n';
-  }
-
-  void DumpStackMapHeader(std::ostream& os, const CodeInfo& code_info, size_t stack_map_num) {
-    StackMap stack_map = code_info.GetStackMapAt(stack_map_num);
-    os << "    StackMap " << stack_map_num
-       << std::hex
-       << " (dex_pc=0x" << stack_map.GetDexPc()
-       << ", native_pc_offset=0x" << stack_map.GetNativePcOffset()
-       << ", register_mask=0x" << stack_map.GetRegisterMask()
-       << std::dec
-       << ", stack_mask=0b";
-    MemoryRegion stack_mask = stack_map.GetStackMask();
-    for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) {
-      os << stack_mask.LoadBit(e - i - 1);
-    }
-    os << ")\n";
-  };
-
   // Display a CodeInfo object emitted by the optimizing compiler.
   void DumpCodeInfo(std::ostream& os,
                     const CodeInfo& code_info,
                     const DexFile::CodeItem& code_item) {
-    uint16_t number_of_dex_registers = code_item.registers_size_;
-    uint32_t code_info_size = code_info.GetOverallSize();
-    size_t number_of_stack_maps = code_info.GetNumberOfStackMaps();
-    os << "  Optimized CodeInfo (size=" << code_info_size
-       << ", number_of_dex_registers=" << number_of_dex_registers
-       << ", number_of_stack_maps=" << number_of_stack_maps << ")\n";
-
-    // Display stack maps along with Dex register maps.
-    for (size_t i = 0; i < number_of_stack_maps; ++i) {
-      StackMap stack_map = code_info.GetStackMapAt(i);
-      DumpStackMapHeader(os, code_info, i);
-      if (stack_map.HasDexRegisterMap()) {
-        DexRegisterMap dex_register_map =
-            code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-        // TODO: Display the bit mask of live Dex registers.
-        for (size_t j = 0; j < number_of_dex_registers; ++j) {
-          if (dex_register_map.IsDexRegisterLive(j)) {
-            DexRegisterLocation location =
-                dex_register_map.GetLocationKindAndValue(j, number_of_dex_registers);
-            DumpRegisterMapping(os, j, location.GetInternalKind(), location.GetValue());
-          }
-        }
-      }
-    }
-    // TODO: Dump the stack map's inline information.
+    code_info.Dump(os, code_item.registers_size_);
   }
 
   // Display a vmap table.
@@ -1504,7 +1453,9 @@
     std::string error_msg;
     const OatFile* oat_file = class_linker->FindOpenedOatFileFromOatLocation(oat_location);
     if (oat_file == nullptr) {
-      oat_file = OatFile::Open(oat_location, oat_location, nullptr, nullptr, false, &error_msg);
+      oat_file = OatFile::Open(oat_location, oat_location,
+                               nullptr, nullptr, false, nullptr,
+                               &error_msg);
       if (oat_file == nullptr) {
         os << "NOT FOUND: " << error_msg << "\n";
         return false;
@@ -2244,7 +2195,7 @@
                    std::ostream* os) {
   std::string error_msg;
   OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, nullptr, false,
-                                    &error_msg);
+                                    nullptr, &error_msg);
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
@@ -2260,7 +2211,7 @@
 static int SymbolizeOat(const char* oat_filename, std::string& output_name) {
   std::string error_msg;
   OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, nullptr, false,
-                                    &error_msg);
+                                    nullptr, &error_msg);
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 8f20381..5548d94 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -79,6 +79,7 @@
   intern_table.cc \
   interpreter/interpreter.cc \
   interpreter/interpreter_common.cc \
+  interpreter/interpreter_goto_table_impl.cc \
   interpreter/interpreter_switch_impl.cc \
   interpreter/unstarted_runtime.cc \
   java_vm_ext.cc \
@@ -150,6 +151,7 @@
   runtime_options.cc \
   signal_catcher.cc \
   stack.cc \
+  stack_map.cc \
   thread.cc \
   thread_list.cc \
   thread_pool.cc \
@@ -202,10 +204,6 @@
   entrypoints/quick/quick_throw_entrypoints.cc \
   entrypoints/quick/quick_trampoline_entrypoints.cc
 
-# Source files that only compile with GCC.
-LIBART_GCC_ONLY_SRC_FILES := \
-  interpreter/interpreter_goto_table_impl.cc
-
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
@@ -436,19 +434,7 @@
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
     LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
-
-    # TODO: Loop with ifeq, ART_TARGET_CLANG
-    ifneq ($$(ART_TARGET_CLANG_$$(TARGET_ARCH)),true)
-      LOCAL_SRC_FILES_$$(TARGET_ARCH) += $$(LIBART_GCC_ONLY_SRC_FILES)
-    endif
-    ifneq ($$(ART_TARGET_CLANG_$$(TARGET_2ND_ARCH)),true)
-      LOCAL_SRC_FILES_$$(TARGET_2ND_ARCH) += $$(LIBART_GCC_ONLY_SRC_FILES)
-    endif
   else # host
-    ifneq ($$(ART_HOST_CLANG),true)
-      # Add files only built with GCC on the host.
-      LOCAL_SRC_FILES += $$(LIBART_GCC_ONLY_SRC_FILES)
-    endif
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_LDLIBS := $$(ART_HOST_LDLIBS)
     LOCAL_LDLIBS += -ldl -lpthread
@@ -534,7 +520,6 @@
 # Clear locally defined variables.
 LOCAL_PATH :=
 LIBART_COMMON_SRC_FILES :=
-LIBART_GCC_ONLY_SRC_FILES :=
 LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES :=
 LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
 2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
diff --git a/runtime/arch/instruction_set_features.cc b/runtime/arch/instruction_set_features.cc
index 1fd1dea..db4b0b1 100644
--- a/runtime/arch/instruction_set_features.cc
+++ b/runtime/arch/instruction_set_features.cc
@@ -250,7 +250,11 @@
     }
     first = true;
   }
-  DCHECK_EQ(use_default, features.empty());
+  // Expectation: "default" is standalone, no other flags. But an empty features vector after
+  // processing can also come along if the handled flags (at the moment only smp) are the only
+  // ones in the list. So logically, we check "default -> features.empty."
+  DCHECK(!use_default || features.empty());
+
   return AddFeaturesFromSplitString(smp, features, error_msg);
 }
 
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 00ab613..93d79b7 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -24,13 +24,56 @@
 
 namespace art {
 
+// An enum for the Mips revision.
+enum class MipsLevel {
+  kBase,
+  kR2,
+  kR5,
+  kR6
+};
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kR6;
+#elif defined(_MIPS_ARCH_MIPS32R5)
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kR5;
+#elif defined(_MIPS_ARCH_MIPS32R2)
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kR2;
+#else
+static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kBase;
+#endif
+
+static void GetFlagsFromCppDefined(bool* mips_isa_gte2, bool* r6, bool* fpu_32bit) {
+  // Override defaults based on compiler flags.
+  if (kRuntimeMipsLevel >= MipsLevel::kR2) {
+    *mips_isa_gte2 = true;
+  } else {
+    *mips_isa_gte2 = false;
+  }
+
+  if (kRuntimeMipsLevel >= MipsLevel::kR5) {
+    *fpu_32bit = false;
+  } else {
+    *fpu_32bit = true;
+  }
+
+  if (kRuntimeMipsLevel >= MipsLevel::kR6) {
+    *r6 = true;
+  } else {
+    *r6 = false;
+  }
+}
+
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
 
   bool smp = true;  // Conservative default.
-  bool fpu_32bit = true;
-  bool mips_isa_gte2 = false;
-  bool r6 = false;
+
+  // Override defaults based on compiler flags.
+  // This is needed when running ART test where the variant is not defined.
+  bool fpu_32bit;
+  bool mips_isa_gte2;
+  bool r6;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   // Override defaults based on variant string.
   // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit.
@@ -67,19 +110,11 @@
 const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCppDefines() {
   // Assume conservative defaults.
   const bool smp = true;
-  bool fpu_32bit = true;
-  bool mips_isa_gte2 = false;
-  bool r6 = false;
 
-  // Override defaults based on compiler flags.
-#if (_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6)
-  mips_isa_gte2 = true;
-#endif
-
-#if defined(_MIPS_ARCH_MIPS32R6)
-  r6 = true;
-  fpu_32bit = false;
-#endif
+  bool fpu_32bit;
+  bool mips_isa_gte2;
+  bool r6;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
 }
@@ -89,19 +124,11 @@
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   // Assume conservative defaults.
   bool smp = false;
-  bool fpu_32bit = true;
-  bool mips_isa_gte2 = false;
-  bool r6 = false;
 
-  // Override defaults based on compiler flags.
-#if (_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6)
-  mips_isa_gte2 = true;
-#endif
-
-#if defined(_MIPS_ARCH_MIPS32R6)
-  r6 = true;
-  fpu_32bit = false;
-#endif
+  bool fpu_32bit;
+  bool mips_isa_gte2;
+  bool r6;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0f874a4..5edcd96 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -883,7 +883,44 @@
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movl MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
+                                                               // Load the class
+    movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
+    testl %edx, %edx                                           // Check null class
+    jz   .Lart_quick_alloc_object_tlab_slow_path
+                                                               // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne  .Lart_quick_alloc_object_tlab_slow_path
+                                                               // Check access flags has kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz  .Lart_quick_alloc_object_tlab_slow_path
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
+    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
+    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
+    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
+    addq %rax, %rcx                                            // Add the object size.
+    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
+    ja   .Lart_quick_alloc_object_tlab_slow_path
+    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
+                                                               // Store the class pointer in the header.
+                                                               // No fence needed for x86.
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeTLAB)      // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO         // return or deliver exception
+END_FUNCTION art_quick_alloc_object_tlab
+
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 92f4ebe..b1dbf6f 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -103,6 +103,16 @@
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 125 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
+            art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
+            art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+
 // Offsets within java.lang.Object.
 #define MIRROR_OBJECT_CLASS_OFFSET 0
 ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
@@ -120,6 +130,22 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (52 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
+            art::mirror::Class::AccessFlagsOffset().Int32Value())
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (80 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
+            art::mirror::Class::ObjectSizeOffset().Int32Value())
+#define MIRROR_CLASS_STATUS_OFFSET (92 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
+            art::mirror::Class::StatusOffset().Int32Value())
+
+#define MIRROR_CLASS_STATUS_INITIALIZED 10
+ADD_TEST_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED),
+            static_cast<uint32_t>(art::mirror::Class::kStatusInitialized))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
+ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
+            static_cast<uint32_t>(kAccClassIsFinalizable))
 
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
@@ -134,6 +160,10 @@
     art::mirror::Array::DataOffset(
         sizeof(art::mirror::HeapReference<art::mirror::Object>)).Int32Value())
 
+#define MIRROR_OBJECT_ARRAY_COMPONENT_SIZE 4
+ADD_TEST_EQ(static_cast<size_t>(MIRROR_OBJECT_ARRAY_COMPONENT_SIZE),
+            sizeof(art::mirror::HeapReference<art::mirror::Object>))
+
 // Offsets within java.lang.String.
 #define MIRROR_STRING_VALUE_OFFSET  MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
@@ -149,6 +179,10 @@
 ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET,
             art::mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())
 
+#define MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET,
+            art::mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value())
+
 #define MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32        (36 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32,
             art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
@@ -178,6 +212,13 @@
 #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
 ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
 
+#define OBJECT_ALIGNMENT_MASK 7
+ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - 1)
+
+#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xFFFFFFF8
+ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
+            ~static_cast<uint32_t>(art::kObjectAlignment - 1))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 992e5b1..ab63ddd 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -50,91 +50,101 @@
 };
 
 // Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
-// boxed. Uses linear probing.
-// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item)
+// boxed. Uses linear probing to resolve collisions.
+// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
+// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
+// and more complicated.
 template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
     class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
 class HashSet {
- public:
-  static constexpr double kDefaultMinLoadFactor = 0.5;
-  static constexpr double kDefaultMaxLoadFactor = 0.9;
-  static constexpr size_t kMinBuckets = 1000;
-
-  class Iterator {
+  template <class Elem, class HashSetType>
+  class BaseIterator {
    public:
-    Iterator(const Iterator&) = default;
-    Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) {
+    BaseIterator(const BaseIterator&) = default;
+    BaseIterator(BaseIterator&&) = default;
+    BaseIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {
     }
-    Iterator& operator=(const Iterator&) = default;
-    bool operator==(const Iterator& other) const {
-      return hash_set_ == other.hash_set_ && index_ == other.index_;
+    BaseIterator& operator=(const BaseIterator&) = default;
+    BaseIterator& operator=(BaseIterator&&) = default;
+
+    bool operator==(const BaseIterator& other) const {
+      return hash_set_ == other.hash_set_ && this->index_ == other.index_;
     }
-    bool operator!=(const Iterator& other) const {
+
+    bool operator!=(const BaseIterator& other) const {
       return !(*this == other);
     }
-    Iterator operator++() {  // Value after modification.
-      index_ = NextNonEmptySlot(index_);
+
+    BaseIterator operator++() {  // Value after modification.
+      this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
       return *this;
     }
-    Iterator operator++(int) {
+
+    BaseIterator operator++(int) {
       Iterator temp = *this;
-      index_ = NextNonEmptySlot(index_);
+      this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
       return temp;
     }
-    T& operator*() {
-      DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
-      return hash_set_->ElementForIndex(index_);
+
+    Elem& operator*() const {
+      DCHECK(!hash_set_->IsFreeSlot(this->index_));
+      return hash_set_->ElementForIndex(this->index_);
     }
-    const T& operator*() const {
-      DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
-      return hash_set_->ElementForIndex(index_);
-    }
-    T* operator->() {
+
+    Elem* operator->() const {
       return &**this;
     }
-    const T* operator->() const {
-      return &**this;
-    }
+
     // TODO: Operator -- --(int)
 
    private:
-    HashSet* hash_set_;
     size_t index_;
+    HashSetType* hash_set_;
 
-    size_t GetIndex() const {
-      return index_;
-    }
-    size_t NextNonEmptySlot(size_t index) const {
-      const size_t num_buckets = hash_set_->NumBuckets();
+    size_t NextNonEmptySlot(size_t index, const HashSet* hash_set) const {
+      const size_t num_buckets = hash_set->NumBuckets();
       DCHECK_LT(index, num_buckets);
       do {
         ++index;
-      } while (index < num_buckets && hash_set_->IsFreeSlot(index));
+      } while (index < num_buckets && hash_set->IsFreeSlot(index));
       return index;
     }
 
     friend class HashSet;
   };
 
+ public:
+  static constexpr double kDefaultMinLoadFactor = 0.5;
+  static constexpr double kDefaultMaxLoadFactor = 0.9;
+  static constexpr size_t kMinBuckets = 1000;
+
+  typedef BaseIterator<T, HashSet> Iterator;
+  typedef BaseIterator<const T, const HashSet> ConstIterator;
+
   void Clear() {
     DeallocateStorage();
     AllocateStorage(1);
     num_elements_ = 0;
     elements_until_expand_ = 0;
   }
+
   HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
       min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
     Clear();
   }
+
   HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
     *this = other;
   }
+
   HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
     *this = std::move(other);
   }
+
   ~HashSet() {
     DeallocateStorage();
   }
+
   HashSet& operator=(HashSet&& other) {
     std::swap(data_, other.data_);
     std::swap(num_buckets_, other.num_buckets_);
@@ -144,6 +154,7 @@
     std::swap(max_load_factor_, other.max_load_factor_);
     return *this;
   }
+
   HashSet& operator=(const HashSet& other) {
     DeallocateStorage();
     AllocateStorage(other.NumBuckets());
@@ -156,21 +167,25 @@
     max_load_factor_ = other.max_load_factor_;
     return *this;
   }
+
   // Lower case for c++11 for each.
   Iterator begin() {
     Iterator ret(this, 0);
-    if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) {
+    if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
       ++ret;  // Skip all the empty slots.
     }
     return ret;
   }
+
   // Lower case for c++11 for each.
   Iterator end() {
     return Iterator(this, NumBuckets());
   }
+
   bool Empty() {
-    return begin() == end();
+    return Size() == 0;
   }
+
   // Erase algorithm:
   // Make an empty slot where the iterator is pointing.
   // Scan fowards until we hit another empty slot.
@@ -181,7 +196,7 @@
   // element to its actual location/index.
   Iterator Erase(Iterator it) {
     // empty_index is the index that will become empty.
-    size_t empty_index = it.GetIndex();
+    size_t empty_index = it.index_;
     DCHECK(!IsFreeSlot(empty_index));
     size_t next_index = empty_index;
     bool filled = false;  // True if we filled the empty index.
@@ -224,33 +239,36 @@
     }
     return it;
   }
+
   // Find an element, returns end() if not found.
-  // Allows custom K types, example of when this is useful.
+  // Allows custom key (K) types, example of when this is useful:
   // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
   // object in the heap for performance solution.
   template <typename K>
   Iterator Find(const K& element) {
     return FindWithHash(element, hashfn_(element));
   }
+
+  template <typename K>
+  ConstIterator Find(const K& element) const {
+    return FindWithHash(element, hashfn_(element));
+  }
+
   template <typename K>
   Iterator FindWithHash(const K& element, size_t hash) {
-    DCHECK_EQ(hashfn_(element), hash);
-    size_t index = IndexForHash(hash);
-    while (true) {
-      T& slot = ElementForIndex(index);
-      if (emptyfn_.IsEmpty(slot)) {
-        return end();
-      }
-      if (pred_(slot, element)) {
-        return Iterator(this, index);
-      }
-      index = NextIndex(index);
-    }
+    return Iterator(this, FindIndex(element, hash));
   }
+
+  template <typename K>
+  ConstIterator FindWithHash(const K& element, size_t hash) const {
+    return ConstIterator(this, FindIndex(element, hash));
+  }
+
   // Insert an element, allows duplicates.
   void Insert(const T& element) {
     InsertWithHash(element, hashfn_(element));
   }
+
   void InsertWithHash(const T& element, size_t hash) {
     DCHECK_EQ(hash, hashfn_(element));
     if (num_elements_ >= elements_until_expand_) {
@@ -261,12 +279,15 @@
     data_[index] = element;
     ++num_elements_;
   }
+
   size_t Size() const {
     return num_elements_;
   }
+
   void ShrinkToMaximumLoad() {
     Resize(Size() / max_load_factor_);
   }
+
   // To distance that inserted elements were probed. Used for measuring how good hash functions
   // are.
   size_t TotalProbeDistance() const {
@@ -284,10 +305,12 @@
     }
     return total;
   }
+
   // Calculate the current load factor and return it.
   double CalculateLoadFactor() const {
     return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
   }
+
   // Make sure that everything reinserts in the right spot. Returns the number of errors.
   size_t Verify() {
     size_t errors = 0;
@@ -314,14 +337,17 @@
     DCHECK(data_ != nullptr);
     return data_[index];
   }
+
   const T& ElementForIndex(size_t index) const {
     DCHECK_LT(index, NumBuckets());
     DCHECK(data_ != nullptr);
     return data_[index];
   }
+
   size_t IndexForHash(size_t hash) const {
     return hash % num_buckets_;
   }
+
   size_t NextIndex(size_t index) const {
     if (UNLIKELY(++index >= num_buckets_)) {
       DCHECK_EQ(index, NumBuckets());
@@ -329,12 +355,33 @@
     }
     return index;
   }
+
+  // Find the hash table slot for an element, or return NumBuckets() if not found.
+  // This value for not found is important so that Iterator(this, FindIndex(...)) == end().
+  template <typename K>
+  size_t FindIndex(const K& element, size_t hash) const {
+    DCHECK_EQ(hashfn_(element), hash);
+    size_t index = IndexForHash(hash);
+    while (true) {
+      const T& slot = ElementForIndex(index);
+      if (emptyfn_.IsEmpty(slot)) {
+        return NumBuckets();
+      }
+      if (pred_(slot, element)) {
+        return index;
+      }
+      index = NextIndex(index);
+    }
+  }
+
   bool IsFreeSlot(size_t index) const {
     return emptyfn_.IsEmpty(ElementForIndex(index));
   }
+
   size_t NumBuckets() const {
     return num_buckets_;
   }
+
   // Allocate a number of buckets.
   void AllocateStorage(size_t num_buckets) {
     num_buckets_ = num_buckets;
@@ -344,6 +391,7 @@
       emptyfn_.MakeEmpty(data_[i]);
     }
   }
+
   void DeallocateStorage() {
     if (num_buckets_ != 0) {
       for (size_t i = 0; i < NumBuckets(); ++i) {
@@ -354,6 +402,7 @@
       num_buckets_ = 0;
     }
   }
+
   // Expand the set based on the load factors.
   void Expand() {
     size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
@@ -365,6 +414,7 @@
     // When we hit elements_until_expand_, we are at the max load factor and must expand again.
     elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
+
   // Expand / shrink the table to the new specified size.
   void Resize(size_t new_size) {
     DCHECK_GE(new_size, Size());
@@ -381,6 +431,7 @@
     }
     allocfn_.deallocate(old_data, old_num_buckets);
   }
+
   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
     while (!emptyfn_.IsEmpty(data_[index])) {
       index = NextIndex(index);
@@ -398,8 +449,6 @@
   T* data_;  // Backing storage.
   double min_load_factor_;
   double max_load_factor_;
-
-  friend class Iterator;
 };
 
 }  // namespace art
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index 5f498d9..e88637f 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -21,7 +21,7 @@
 #include <string>
 #include <unordered_set>
 
-#include "common_runtime_test.h"
+#include <gtest/gtest.h>
 #include "hash_map.h"
 
 namespace art {
@@ -35,7 +35,7 @@
   }
 };
 
-class HashSetTest : public CommonRuntimeTest {
+class HashSetTest : public testing::Test {
  public:
   HashSetTest() : seed_(97421), unique_number_(0) {
   }
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 0ec0295..204546d 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -69,8 +69,8 @@
     uint16_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-    MemoryRegion stack_mask = stack_map.GetStackMask();
-    uint32_t register_mask = stack_map.GetRegisterMask();
+    MemoryRegion stack_mask = stack_map.GetStackMask(code_info);
+    uint32_t register_mask = stack_map.GetRegisterMask(code_info);
     for (int i = 0; i < number_of_references; ++i) {
       int reg = registers[i];
       CHECK(reg < m->GetCodeItem()->registers_size_);
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 700e1ad..67872d7 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -282,7 +282,7 @@
   CHECK(java_lang_Object.Get() != nullptr);
   // backfill Object as the super class of Class.
   java_lang_Class->SetSuperClass(java_lang_Object.Get());
-  java_lang_Object->SetStatus(mirror::Class::kStatusLoaded, self);
+  mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusLoaded, self);
 
   // Object[] next to hold class roots.
   Handle<mirror::Class> object_array_class(hs.NewHandle(
@@ -311,14 +311,14 @@
       AllocClass(self, java_lang_Class.Get(), mirror::String::ClassSize())));
   mirror::String::SetClass(java_lang_String.Get());
   java_lang_String->SetObjectSize(mirror::String::InstanceSize());
-  java_lang_String->SetStatus(mirror::Class::kStatusResolved, self);
+  mirror::Class::SetStatus(java_lang_String, mirror::Class::kStatusResolved, self);
 
   // Setup Reference.
   Handle<mirror::Class> java_lang_ref_Reference(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(), mirror::Reference::ClassSize())));
   mirror::Reference::SetClass(java_lang_ref_Reference.Get());
   java_lang_ref_Reference->SetObjectSize(mirror::Reference::InstanceSize());
-  java_lang_ref_Reference->SetStatus(mirror::Class::kStatusResolved, self);
+  mirror::Class::SetStatus(java_lang_ref_Reference, mirror::Class::kStatusResolved, self);
 
   // Create storage for root classes, save away our work so far (requires descriptors).
   class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class> >(
@@ -360,7 +360,7 @@
       AllocClass(self, java_lang_Class.Get(), mirror::DexCache::ClassSize())));
   SetClassRoot(kJavaLangDexCache, java_lang_DexCache.Get());
   java_lang_DexCache->SetObjectSize(mirror::DexCache::InstanceSize());
-  java_lang_DexCache->SetStatus(mirror::Class::kStatusResolved, self);
+  mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusResolved, self);
 
   // Constructor, Field, Method, and AbstractMethod are necessary so
   // that FindClass can link members.
@@ -369,7 +369,7 @@
   CHECK(java_lang_reflect_ArtField.Get() != nullptr);
   java_lang_reflect_ArtField->SetObjectSize(mirror::ArtField::InstanceSize());
   SetClassRoot(kJavaLangReflectArtField, java_lang_reflect_ArtField.Get());
-  java_lang_reflect_ArtField->SetStatus(mirror::Class::kStatusResolved, self);
+  mirror::Class::SetStatus(java_lang_reflect_ArtField, mirror::Class::kStatusResolved, self);
   mirror::ArtField::SetClass(java_lang_reflect_ArtField.Get());
 
   Handle<mirror::Class> java_lang_reflect_ArtMethod(hs.NewHandle(
@@ -378,7 +378,7 @@
   size_t pointer_size = GetInstructionSetPointerSize(Runtime::Current()->GetInstructionSet());
   java_lang_reflect_ArtMethod->SetObjectSize(mirror::ArtMethod::InstanceSize(pointer_size));
   SetClassRoot(kJavaLangReflectArtMethod, java_lang_reflect_ArtMethod.Get());
-  java_lang_reflect_ArtMethod->SetStatus(mirror::Class::kStatusResolved, self);
+  mirror::Class::SetStatus(java_lang_reflect_ArtMethod, mirror::Class::kStatusResolved, self);
   mirror::ArtMethod::SetClass(java_lang_reflect_ArtMethod.Get());
 
   // Set up array classes for string, field, method
@@ -434,18 +434,18 @@
   }
 
   // Object, String and DexCache need to be rerun through FindSystemClass to finish init
-  java_lang_Object->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusNotReady, self);
   mirror::Class* Object_class = FindSystemClass(self, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object.Get(), Object_class);
   CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
-  java_lang_String->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class::SetStatus(java_lang_String, mirror::Class::kStatusNotReady, self);
   mirror::Class* String_class = FindSystemClass(self, "Ljava/lang/String;");
   std::ostringstream os1, os2;
   java_lang_String->DumpClass(os1, mirror::Class::kDumpClassFullDetail);
   String_class->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
   CHECK_EQ(java_lang_String.Get(), String_class) << os1.str() << "\n\n" << os2.str();
   CHECK_EQ(java_lang_String->GetObjectSize(), mirror::String::InstanceSize());
-  java_lang_DexCache->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
   mirror::Class* DexCache_class = FindSystemClass(self, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_String.Get(), String_class);
   CHECK_EQ(java_lang_DexCache.Get(), DexCache_class);
@@ -505,11 +505,11 @@
   mirror::Class* Class_class = FindSystemClass(self, "Ljava/lang/Class;");
   CHECK_EQ(java_lang_Class.Get(), Class_class);
 
-  java_lang_reflect_ArtMethod->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class::SetStatus(java_lang_reflect_ArtMethod, mirror::Class::kStatusNotReady, self);
   mirror::Class* Art_method_class = FindSystemClass(self, "Ljava/lang/reflect/ArtMethod;");
   CHECK_EQ(java_lang_reflect_ArtMethod.Get(), Art_method_class);
 
-  java_lang_reflect_ArtField->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class::SetStatus(java_lang_reflect_ArtField, mirror::Class::kStatusNotReady, self);
   mirror::Class* Art_field_class = FindSystemClass(self, "Ljava/lang/reflect/ArtField;");
   CHECK_EQ(java_lang_reflect_ArtField.Get(), Art_field_class);
 
@@ -533,7 +533,7 @@
 
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
   // finish initializing Reference class
-  java_lang_ref_Reference->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class::SetStatus(java_lang_ref_Reference, mirror::Class::kStatusNotReady, self);
   mirror::Class* Reference_class = FindSystemClass(self, "Ljava/lang/ref/Reference;");
   CHECK_EQ(java_lang_ref_Reference.Get(), Reference_class);
   CHECK_EQ(java_lang_ref_Reference->GetObjectSize(), mirror::Reference::InstanceSize());
@@ -1213,7 +1213,7 @@
     // Check for circular dependencies between classes.
     if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
       ThrowClassCircularityError(h_class.Get());
-      h_class->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(h_class, mirror::Class::kStatusError, self);
       return nullptr;
     }
     // Wait for the pending initialization to complete.
@@ -1489,7 +1489,7 @@
     // An exception occured during load, set status to erroneous while holding klass' lock in case
     // notification is necessary.
     if (!klass->IsErroneous()) {
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
     }
     return nullptr;
   }
@@ -1508,7 +1508,7 @@
   if (!LoadSuperAndInterfaces(klass, dex_file)) {
     // Loading failed.
     if (!klass->IsErroneous()) {
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
     }
     return nullptr;
   }
@@ -1522,7 +1522,7 @@
   if (!LinkClass(self, descriptor, klass, interfaces, &new_class)) {
     // Linking failed.
     if (!klass->IsErroneous()) {
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
     }
     return nullptr;
   }
@@ -1894,7 +1894,7 @@
   klass->SetAccessFlags(access_flags);
   klass->SetClassLoader(class_loader);
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
-  klass->SetStatus(mirror::Class::kStatusIdx, nullptr);
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, nullptr);
 
   klass->SetDexClassDefIndex(dex_file.GetIndexForClassDef(dex_class_def));
   klass->SetDexTypeIndex(dex_class_def.class_idx_);
@@ -2223,14 +2223,14 @@
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> h_class(hs.NewHandle(primitive_class));
   ObjectLock<mirror::Class> lock(self, h_class);
-  primitive_class->SetAccessFlags(kAccPublic | kAccFinal | kAccAbstract);
-  primitive_class->SetPrimitiveType(type);
-  primitive_class->SetStatus(mirror::Class::kStatusInitialized, self);
+  h_class->SetAccessFlags(kAccPublic | kAccFinal | kAccAbstract);
+  h_class->SetPrimitiveType(type);
+  mirror::Class::SetStatus(h_class, mirror::Class::kStatusInitialized, self);
   const char* descriptor = Primitive::Descriptor(type);
-  mirror::Class* existing = InsertClass(descriptor, primitive_class,
+  mirror::Class* existing = InsertClass(descriptor, h_class.Get(),
                                         ComputeModifiedUtf8Hash(descriptor));
   CHECK(existing == nullptr) << "InitPrimitiveClass(" << type << ") failed";
-  return primitive_class;
+  return h_class.Get();
 }
 
 // Create an array class (i.e. the class object for the array, not the
@@ -2336,13 +2336,13 @@
   new_class->SetVTable(java_lang_Object->GetVTable());
   new_class->SetPrimitiveType(Primitive::kPrimNot);
   new_class->SetClassLoader(component_type->GetClassLoader());
-  new_class->SetStatus(mirror::Class::kStatusLoaded, self);
+  mirror::Class::SetStatus(new_class, mirror::Class::kStatusLoaded, self);
   {
     StackHandleScope<mirror::Class::kImtSize> hs2(self,
                                                   Runtime::Current()->GetImtUnimplementedMethod());
     new_class->PopulateEmbeddedImtAndVTable(&hs2);
   }
-  new_class->SetStatus(mirror::Class::kStatusInitialized, self);
+  mirror::Class::SetStatus(new_class, mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
 
@@ -2672,17 +2672,17 @@
   }
 
   if (klass->GetStatus() == mirror::Class::kStatusResolved) {
-    klass->SetStatus(mirror::Class::kStatusVerifying, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifying, self);
   } else {
     CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
         << PrettyClass(klass.Get());
     CHECK(!Runtime::Current()->IsAotCompiler());
-    klass->SetStatus(mirror::Class::kStatusVerifyingAtRuntime, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
   }
 
   // Skip verification if disabled.
   if (!Runtime::Current()->IsVerificationEnabled()) {
-    klass->SetStatus(mirror::Class::kStatusVerified, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
     EnsurePreverifiedMethods(klass);
     return;
   }
@@ -2715,7 +2715,7 @@
       if (Runtime::Current()->IsAotCompiler()) {
         Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
       }
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return;
     }
   }
@@ -2730,7 +2730,7 @@
         << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
     ThrowVerifyError(klass.Get(), "Rejecting class %s because it failed compile-time verification",
                      PrettyDescriptor(klass.Get()).c_str());
-    klass->SetStatus(mirror::Class::kStatusError, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
     return;
   }
   verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
@@ -2753,10 +2753,10 @@
       // Even though there were no verifier failures we need to respect whether the super-class
       // was verified or requiring runtime reverification.
       if (super.Get() == nullptr || super->IsVerified()) {
-        klass->SetStatus(mirror::Class::kStatusVerified, self);
+        mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
       } else {
         CHECK_EQ(super->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
-        klass->SetStatus(mirror::Class::kStatusRetryVerificationAtRuntime, self);
+        mirror::Class::SetStatus(klass, mirror::Class::kStatusRetryVerificationAtRuntime, self);
         // Pretend a soft failure occured so that we don't consider the class verified below.
         verifier_failure = verifier::MethodVerifier::kSoftFailure;
       }
@@ -2766,9 +2766,9 @@
       // failures at runtime will be handled by slow paths in the generated
       // code. Set status accordingly.
       if (Runtime::Current()->IsAotCompiler()) {
-        klass->SetStatus(mirror::Class::kStatusRetryVerificationAtRuntime, self);
+        mirror::Class::SetStatus(klass, mirror::Class::kStatusRetryVerificationAtRuntime, self);
       } else {
-        klass->SetStatus(mirror::Class::kStatusVerified, self);
+        mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
         // As this is a fake verified status, make sure the methods are _not_ marked preverified
         // later.
         klass->SetPreverified();
@@ -2780,7 +2780,7 @@
         << " because: " << error_msg;
     self->AssertNoPendingException();
     ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
-    klass->SetStatus(mirror::Class::kStatusError, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
   }
   if (preverified || verifier_failure == verifier::MethodVerifier::kNoFailure) {
     // Class is verified so we don't need to do any access check on its methods.
@@ -2947,7 +2947,7 @@
   klass->SetName(soa.Decode<mirror::String*>(name));
   mirror::Class* proxy_class = GetClassRoot(kJavaLangReflectProxy);
   klass->SetDexCache(proxy_class->GetDexCache());
-  klass->SetStatus(mirror::Class::kStatusIdx, self);
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
 
   // Instance fields are inherited, but we add a couple of static fields...
   {
@@ -3022,7 +3022,7 @@
   }
 
   klass->SetSuperClass(proxy_class);  // The super class is java.lang.reflect.Proxy
-  klass->SetStatus(mirror::Class::kStatusLoaded, self);  // Now effectively in the loaded state.
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusLoaded, self);  // Now effectively in the loaded state.
   self->AssertNoPendingException();
 
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
@@ -3034,7 +3034,7 @@
     Handle<mirror::ObjectArray<mirror::Class> > h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
     if (!LinkClass(self, descriptor.c_str(), klass, h_interfaces, &new_class)) {
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return nullptr;
     }
   }
@@ -3053,7 +3053,7 @@
   {
     // Lock on klass is released. Lock new class object.
     ObjectLock<mirror::Class> initialization_lock(self, klass);
-    klass->SetStatus(mirror::Class::kStatusInitialized, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusInitialized, self);
   }
 
   // sanity checks
@@ -3310,7 +3310,7 @@
     }
 
     if (!ValidateSuperClassDescriptors(klass)) {
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return false;
     }
     self->AllowThreadSuspension();
@@ -3320,7 +3320,7 @@
     // From here out other threads may observe that we're initializing and so changes of state
     // require the a notification.
     klass->SetClinitThreadId(self->GetTid());
-    klass->SetStatus(mirror::Class::kStatusInitializing, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusInitializing, self);
 
     t0 = NanoTime();
   }
@@ -3345,7 +3345,7 @@
             << (self->GetException() != nullptr ? self->GetException()->Dump() : "");
         ObjectLock<mirror::Class> lock(self, klass);
         // Initialization failed because the super-class is erroneous.
-        klass->SetStatus(mirror::Class::kStatusError, self);
+        mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
         return false;
       }
     }
@@ -3410,7 +3410,7 @@
 
     if (self->IsExceptionPending()) {
       WrapExceptionInInitializer(klass);
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       success = false;
     } else if (Runtime::Current()->IsTransactionAborted()) {
       // The exception thrown when the transaction aborted has been caught and cleared
@@ -3418,7 +3418,7 @@
       VLOG(compiler) << "Return from class initializer of " << PrettyDescriptor(klass.Get())
                      << " without exception while transaction was aborted: re-throw it now.";
       Runtime::Current()->ThrowInternalErrorForAbortedTransaction(self);
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       success = false;
     } else {
       RuntimeStats* global_stats = Runtime::Current()->GetStats();
@@ -3428,7 +3428,7 @@
       global_stats->class_init_time_ns += (t1 - t0);
       thread_stats->class_init_time_ns += (t1 - t0);
       // Set the class as initialized except if failed to initialize static fields.
-      klass->SetStatus(mirror::Class::kStatusInitialized, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusInitialized, self);
       if (VLOG_IS_ON(class_linker)) {
         std::string temp;
         LOG(INFO) << "Initialized class " << klass->GetDescriptor(&temp) << " from " <<
@@ -3454,7 +3454,7 @@
     // we were not using WaitIgnoringInterrupts), bail out.
     if (self->IsExceptionPending()) {
       WrapExceptionInInitializer(klass);
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return false;
     }
     // Spurious wakeup? Go back to waiting.
@@ -3688,7 +3688,7 @@
 
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
-    klass->SetStatus(mirror::Class::kStatusResolved, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
     *new_class = klass.Get();
   } else {
     CHECK(!klass->IsResolved());
@@ -3696,7 +3696,7 @@
     *new_class = klass->CopyOf(self, class_size, &imt_handle_scope);
     if (UNLIKELY(*new_class == nullptr)) {
       CHECK(self->IsExceptionPending());  // Expect an OOME.
-      klass->SetStatus(mirror::Class::kStatusError, self);
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return false;
     }
 
@@ -3713,12 +3713,12 @@
 
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
-    klass->SetStatus(mirror::Class::kStatusRetired, self);
+    mirror::Class::SetStatus(klass, mirror::Class::kStatusRetired, self);
 
     CHECK_EQ(new_class_h->GetStatus(), mirror::Class::kStatusResolving);
     // This will notify waiters on new_class that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
-    new_class_h->SetStatus(mirror::Class::kStatusResolved, self);
+    mirror::Class::SetStatus(new_class_h, mirror::Class::kStatusResolved, self);
   }
   return true;
 }
@@ -3933,7 +3933,7 @@
     }
   }
   // Mark the class as loaded.
-  klass->SetStatus(mirror::Class::kStatusLoaded, nullptr);
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusLoaded, nullptr);
   return true;
 }
 
@@ -5255,7 +5255,7 @@
 }
 
 bool ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                        const GcRoot<mirror::Class>& b) {
+                                                        const GcRoot<mirror::Class>& b) const {
   if (a.Read()->GetClassLoader() != b.Read()->GetClassLoader()) {
     return false;
   }
@@ -5269,7 +5269,7 @@
 }
 
 bool ClassLinker::ClassDescriptorHashEquals::operator()(
-    const GcRoot<mirror::Class>& a, const std::pair<const char*, mirror::ClassLoader*>& b) {
+    const GcRoot<mirror::Class>& a, const std::pair<const char*, mirror::ClassLoader*>& b) const {
   if (a.Read()->GetClassLoader() != b.second) {
     return false;
   }
@@ -5277,7 +5277,7 @@
 }
 
 bool ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                        const char* descriptor) {
+                                                        const char* descriptor) const {
   return a.Read()->DescriptorEquals(descriptor);
 }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 75fbdf3..4ebce3e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -661,16 +661,16 @@
    public:
     // Same class loader and descriptor.
     std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
-    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b)
+    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
         NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
     std::size_t operator()(const std::pair<const char*, mirror::ClassLoader*>& element) const
         NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::Class>& a,
-                    const std::pair<const char*, mirror::ClassLoader*>& b)
+                    const std::pair<const char*, mirror::ClassLoader*>& b) const
         NO_THREAD_SAFETY_ANALYSIS;
     // Same descriptor.
-    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor)
+    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
     std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
   };
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6296cf5..7144577 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDebuggerActive;     // debugger is making requests.
 static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
@@ -319,6 +318,7 @@
 static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
+bool Dbg::gDebuggerActive = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -331,7 +331,6 @@
 // Deoptimization support.
 std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
 size_t Dbg::full_deoptimization_event_count_ = 0;
-size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
 // Instrumentation event reference counters.
 size_t Dbg::dex_pc_change_event_ref_count_ = 0;
@@ -620,7 +619,7 @@
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
   // Only called from the JDWP handler thread.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     return;
   }
 
@@ -634,7 +633,6 @@
     MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
-    CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
     CHECK_EQ(dex_pc_change_event_ref_count_, 0U);
     CHECK_EQ(method_enter_event_ref_count_, 0U);
     CHECK_EQ(method_exit_event_ref_count_, 0U);
@@ -673,7 +671,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
 
   // Debugger may not be active at this point.
-  if (gDebuggerActive) {
+  if (IsDebuggerActive()) {
     {
       // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
       // This prevents us from having any pending deoptimization request when the debugger attaches
@@ -681,7 +679,6 @@
       MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
-      delayed_full_undeoptimization_count_ = 0U;
     }
     if (instrumentation_events_ != 0) {
       runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
@@ -704,10 +701,6 @@
   gDebuggerConnected = false;
 }
 
-bool Dbg::IsDebuggerActive() {
-  return gDebuggerActive;
-}
-
 void Dbg::ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options) {
   CHECK_NE(jdwp_options.transport, JDWP::kJdwpTransportUnknown);
   gJdwpOptions = jdwp_options;
@@ -3020,29 +3013,6 @@
   }
 }
 
-void Dbg::DelayFullUndeoptimization() {
-  if (RequiresDeoptimization()) {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    ++delayed_full_undeoptimization_count_;
-    DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
-  }
-}
-
-void Dbg::ProcessDelayedFullUndeoptimizations() {
-  // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
-  {
-    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-    while (delayed_full_undeoptimization_count_ > 0) {
-      DeoptimizationRequest req;
-      req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
-      req.SetMethod(nullptr);
-      RequestDeoptimizationLocked(req);
-      --delayed_full_undeoptimization_count_;
-    }
-  }
-  ManageDeoptimization();
-}
-
 void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
   if (req.GetKind() == DeoptimizationRequest::kNothing) {
     // Nothing to do.
@@ -3352,6 +3322,125 @@
   }
 }
 
+bool Dbg::IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m) {
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc == nullptr) {
+    // If we are not single-stepping, then we don't have to force interpreter.
+    return false;
+  }
+  if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
+    // If we are in interpreter only mode, then we don't have to force interpreter.
+    return false;
+  }
+
+  if (!m->IsNative() && !m->IsProxyMethod()) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Dbg::IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we want to step into a method, then we have to force interpreter on that call.
+    if (ssc->GetStepDepth() == JDWP::SD_INTO) {
+      return true;
+    }
+    // If we are stepping out from a static initializer, by issuing a step
+    // in or step over, that was implicitly invoked by calling a static method,
+    // then we need to step into that method. Having a lower stack depth than
+    // the one the single step control has indicates that the step originates
+    // from the static initializer.
+    if (ssc->GetStepDepth() != JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // There are cases where we have to force interpreter on deoptimized methods,
+  // because in some cases the call will not be performed by invoking an entry
+  // point that has been replaced by the deoptimization, but instead by directly
+  // invoking the compiled code of the method, for example.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // If we are stepping out from a static initializer, by issuing a step
+    // out, that was implicitly invoked by calling a static method, then we
+    // need to step into the caller of that method. Having a lower stack
+    // depth than the one the single step control has indicates that the
+    // step originates from the static initializer.
+    if (ssc->GetStepDepth() == JDWP::SD_OUT &&
+        ssc->GetStackDepth() > GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // If we are returning from a static intializer, that was implicitly
+  // invoked by calling a static method and the caller is deoptimized,
+  // then we have to deoptimize the stack without forcing interpreter
+  // on the static method that was called originally. This problem can
+  // be solved easily by forcing instrumentation on the called method,
+  // because the instrumentation exit hook will recognise the need of
+  // stack deoptimization by calling IsForcedInterpreterNeededForUpcall.
+  return instrumentation->IsDeoptimized(m);
+}
+
+bool Dbg::IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m) {
+  // The upcall can be nullptr and in that case we don't need to do anything.
+  if (m == nullptr) {
+    return false;
+  }
+  instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  // If we are in interpreter only mode, then we don't have to force interpreter.
+  if (instrumentation->InterpretOnly()) {
+    return false;
+  }
+  // We can only interpret pure Java method.
+  if (m->IsNative() || m->IsProxyMethod()) {
+    return false;
+  }
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // The debugger is not interested in what is happening under the level
+    // of the step, thus we only force interpreter when we are not below of
+    // the step.
+    if (ssc->GetStackDepth() >= GetStackDepth(thread)) {
+      return true;
+    }
+  }
+  // We have to require stack deoptimization if the upcall is deoptimized.
+  return instrumentation->IsDeoptimized(m);
+}
+
 // Scoped utility class to suspend a thread so that we may do tasks such as walk its stack. Doesn't
 // cause suspension if the thread is the current thread.
 class ScopedThreadSuspension {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 01c9d5d..d015294 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -243,7 +243,9 @@
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
-  static bool IsDebuggerActive();
+  static bool IsDebuggerActive() {
+    return gDebuggerActive;
+  }
 
   // Configures JDWP with parsed command-line options.
   static void ConfigureJdwp(const JDWP::JdwpOptions& jdwp_options);
@@ -543,13 +545,6 @@
       LOCKS_EXCLUDED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Support delayed full undeoptimization requests. This is currently only used for single-step
-  // events.
-  static void DelayFullUndeoptimization() LOCKS_EXCLUDED(Locks::deoptimization_lock_);
-  static void ProcessDelayedFullUndeoptimizations()
-      LOCKS_EXCLUDED(Locks::deoptimization_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Manage deoptimization after updating JDWP events list. Suspends all threads, processes each
   // request and finally resumes all threads.
   static void ManageDeoptimization()
@@ -564,6 +559,53 @@
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  /*
+   * Forced interpreter checkers for single-step and continue support.
+   */
+
+  // Indicates whether we need to force the use of interpreter to invoke a method.
+  // This allows to single-step or continue into the called method.
+  static bool IsForcedInterpreterNeededForCalling(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForCallingImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter entrypoint when calling a
+  // method through the resolution trampoline. This allows to single-step or continue into
+  // the called method.
+  static bool IsForcedInterpreterNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of instrumentation entrypoint when calling
+  // a method through the resolution trampoline. This allows to deoptimize the stack for
+  // debugging when we returned from the called method.
+  static bool IsForcedInstrumentationNeededForResolution(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInstrumentationNeededForResolutionImpl(thread, m);
+  }
+
+  // Indicates whether we need to force the use of interpreter when returning from the
+  // interpreter into the runtime. This allows to deoptimize the stack and continue
+  // execution with interpreter for debugging.
+  static bool IsForcedInterpreterNeededForUpcall(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForUpcallImpl(thread, m);
+  }
+
   // Single-stepping.
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
@@ -690,11 +732,27 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static bool IsForcedInterpreterNeededForCallingImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInstrumentationNeededForResolutionImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_);
   static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  // Indicates whether the debugger is making requests.
+  static bool gDebuggerActive;
+
+  // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
   // Deoptimization requests to be processed each time the event list is updated. This is used when
@@ -709,10 +767,6 @@
   // undeoptimize when the last event is unregistered (when the counter is set to 0).
   static size_t full_deoptimization_event_count_ GUARDED_BY(Locks::deoptimization_lock_);
 
-  // Count the number of full undeoptimization requests delayed to next resume or end of debug
-  // session.
-  static size_t delayed_full_undeoptimization_count_ GUARDED_BY(Locks::deoptimization_lock_);
-
   static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event);
 
   // Weak global type cache, TODO improve this.
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index c8ede48..da39573 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -421,15 +421,26 @@
     }
   }
 
-  std::string GetBaseLocation() const {
-    size_t pos = location_.rfind(kMultiDexSeparator);
+  static std::string GetBaseLocation(const std::string& location) {
+    return GetBaseLocation(location.c_str());
+  }
+
+  // Returns the ':classes*.dex' part of the dex location. Returns an empty
+  // string if there is no multidex suffix for the given location.
+  // The kMultiDexSeparator is included in the returned suffix.
+  static std::string GetMultiDexSuffix(const std::string& location) {
+    size_t pos = location.rfind(kMultiDexSeparator);
     if (pos == std::string::npos) {
-      return location_;
+      return "";
     } else {
-      return location_.substr(0, pos);
+      return location.substr(pos);
     }
   }
 
+  std::string GetBaseLocation() const {
+    return GetBaseLocation(location_);
+  }
+
   // For DexFiles directly from .dex files, this is the checksum from the DexFile::Header.
   // For DexFiles opened from a zip files, this will be the ZipEntry CRC32 of classes.dex.
   uint32_t GetLocationChecksum() const {
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 7f5a181..09ef3ee 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -377,4 +377,13 @@
   ASSERT_EQ(0, unlink(dex_location_sym.c_str()));
 }
 
+TEST(DexFileUtilsTest, GetBaseLocationAndMultiDexSuffix) {
+  EXPECT_EQ("/foo/bar/baz.jar", DexFile::GetBaseLocation("/foo/bar/baz.jar"));
+  EXPECT_EQ("/foo/bar/baz.jar", DexFile::GetBaseLocation("/foo/bar/baz.jar:classes2.dex"));
+  EXPECT_EQ("/foo/bar/baz.jar", DexFile::GetBaseLocation("/foo/bar/baz.jar:classes8.dex"));
+  EXPECT_EQ("", DexFile::GetMultiDexSuffix("/foo/bar/baz.jar"));
+  EXPECT_EQ(":classes2.dex", DexFile::GetMultiDexSuffix("/foo/bar/baz.jar:classes2.dex"));
+  EXPECT_EQ(":classes8.dex", DexFile::GetMultiDexSuffix("/foo/bar/baz.jar:classes8.dex"));
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index a90f424..f8f85f9 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -133,7 +133,7 @@
   V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
   V(0x71, INVOKE_STATIC, "invoke-static", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
   V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x73, RETURN_VOID_BARRIER, "return-void-barrier", k10x, false, kNone, kReturn, kVerifyNone) \
+  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, false, kNone, kReturn, kVerifyNone) \
   V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
   V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
   V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
diff --git a/runtime/dex_instruction_utils.h b/runtime/dex_instruction_utils.h
index 1a671c5..f892f98 100644
--- a/runtime/dex_instruction_utils.h
+++ b/runtime/dex_instruction_utils.h
@@ -55,7 +55,7 @@
 
 constexpr bool IsInstructionInvoke(Instruction::Code opcode) {
   return Instruction::INVOKE_VIRTUAL <= opcode && opcode <= Instruction::INVOKE_INTERFACE_RANGE &&
-      opcode != Instruction::RETURN_VOID_BARRIER;
+      opcode != Instruction::RETURN_VOID_NO_BARRIER;
 }
 
 constexpr bool IsInstructionQuickInvoke(Instruction::Code opcode) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 70ee042..8351e22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -30,6 +30,7 @@
 #include "mirror/object_array-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "debugger.h"
 
 namespace art {
 
@@ -639,6 +640,14 @@
     JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    // Request a stack deoptimization if needed
+    mirror::ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+      self->SetException(Thread::GetDeoptimizationException());
+      self->SetDeoptimizationReturnValue(result);
+    }
+
     // No need to restore the args since the method has already been run by the interpreter.
     return result.GetJ();
   }
@@ -950,14 +959,37 @@
         called->GetDexCache()->SetResolvedMethod(called_dex_method_idx, called);
       }
     }
+
     // Ensure that the called method's class is initialized.
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(soa.Self(), called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromQuickCompiledCode();
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (UNLIKELY(Dbg::IsForcedInstrumentationNeededForResolution(self, caller))) {
+        // If the caller is deoptimized (by a breakpoint, for example), we have to
+        // continue its execution with interpreter when returning from the called
+        // method. Because we do not want to execute the called method with the
+        // interpreter, we wrap its execution into the instrumentation stubs.
+        // When the called method returns, it will execute the instrumentation
+        // exit hook that will determine the need of the interpreter with a call
+        // to Dbg::IsForcedInterpreterNeededForUpcall and deoptimize the stack if
+        // it is needed.
+        code = GetQuickInstrumentationEntryPoint();
+      } else {
+        code = called->GetEntryPointFromQuickCompiledCode();
+      }
     } else if (called_class->IsInitializing()) {
-      if (invoke_type == kStatic) {
+      if (UNLIKELY(Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
+        // If we are single-stepping or the called method is deoptimized (by a
+        // breakpoint, for example), then we have to execute the called method
+        // with the interpreter.
+        code = GetQuickToInterpreterBridge();
+      } else if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
         code = linker->GetQuickOatCodeFor(called);
@@ -1152,7 +1184,7 @@
       gpr_index_--;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushGpr(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushGpr(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushGpr(val);
       }
@@ -1160,7 +1192,7 @@
       stack_entries_++;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushStack(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushStack(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushStack(val);
       }
@@ -1220,16 +1252,16 @@
 
   void AdvanceFloat(float val) {
     if (kNativeSoftFloatAbi) {
-      AdvanceInt(bit_cast<float, uint32_t>(val));
+      AdvanceInt(bit_cast<uint32_t, float>(val));
     } else {
       if (HaveFloatFpr()) {
         fpr_index_--;
         if (kRegistersNeededForDouble == 1) {
           if (kMultiFPRegistersWidened) {
-            PushFpr8(bit_cast<double, uint64_t>(val));
+            PushFpr8(bit_cast<uint64_t, double>(val));
           } else {
             // No widening, just use the bits.
-            PushFpr8(bit_cast<float, uint64_t>(val));
+            PushFpr8(static_cast<uint64_t>(bit_cast<uint32_t, float>(val)));
           }
         } else {
           PushFpr4(val);
@@ -1240,9 +1272,9 @@
           // Need to widen before storing: Note the "double" in the template instantiation.
           // Note: We need to jump through those hoops to make the compiler happy.
           DCHECK_EQ(sizeof(uintptr_t), sizeof(uint64_t));
-          PushStack(static_cast<uintptr_t>(bit_cast<double, uint64_t>(val)));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint64_t, double>(val)));
         } else {
-          PushStack(bit_cast<float, uintptr_t>(val));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint32_t, float>(val)));
         }
         fpr_index_ = 0;
       }
@@ -1876,8 +1908,8 @@
       case 'F': {
         if (kRuntimeISA == kX86) {
           // Convert back the result to float.
-          double d = bit_cast<uint64_t, double>(result_f);
-          return bit_cast<float, uint32_t>(static_cast<float>(d));
+          double d = bit_cast<double, uint64_t>(result_f);
+          return bit_cast<uint32_t, float>(static_cast<float>(d));
         } else {
           return result_f;
         }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index dff8f4d..51cf558 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2061,7 +2061,6 @@
   MutexLock mu(self, zygote_creation_lock_);
   // Try to see if we have any Zygote spaces.
   if (HasZygoteSpace()) {
-    LOG(WARNING) << __FUNCTION__ << " called when we already have a zygote space.";
     return;
   }
   Runtime::Current()->GetInternTable()->SwapPostZygoteWithPreZygote();
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 14f770d..1fb3252 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -788,7 +788,8 @@
 
   OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, image_header.GetOatDataBegin(),
                                     image_header.GetOatFileBegin(),
-                                    !Runtime::Current()->IsAotCompiler(), error_msg);
+                                    !Runtime::Current()->IsAotCompiler(),
+                                    nullptr, error_msg);
   if (oat_file == NULL) {
     *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
                               oat_filename.c_str(), GetName(), error_msg->c_str());
diff --git a/runtime/globals.h b/runtime/globals.h
index 0845475..ac8751c 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -109,6 +109,13 @@
 static constexpr bool kPoisonHeapReferences = false;
 #endif
 
+// If true, enable the tlab allocator by default.
+#ifdef ART_USE_TLAB
+static constexpr bool kUseTlab = true;
+#else
+static constexpr bool kUseTlab = false;
+#endif
+
 // Kinds of tracing clocks.
 enum class TraceClockSource {
   kThreadCpu,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index b53b8cd..9adb4ac 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1030,7 +1030,8 @@
   NthCallerVisitor visitor(self, 1, true);
   visitor.WalkStack(true);
   bool deoptimize = (visitor.caller != nullptr) &&
-                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller));
+                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
+                    Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
   if (deoptimize) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 26ab602..a3ab026 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -18,6 +18,7 @@
 
 #include <cmath>
 
+#include "debugger.h"
 #include "mirror/array-inl.h"
 #include "unstarted_runtime.h"
 
@@ -616,8 +617,14 @@
           << PrettyMethod(new_shadow_frame->GetMethod());
       UNREACHABLE();
     }
-    (new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter())(self, code_item,
-                                                                    new_shadow_frame, result);
+    // Force the use of interpreter when it is required by the debugger.
+    mirror::EntryPointFromInterpreter* entry;
+    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForCalling(self, new_shadow_frame->GetMethod()))) {
+      entry = &art::artInterpreterToInterpreterBridge;
+    } else {
+      entry = new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter();
+    }
+    entry(self, code_item, new_shadow_frame, result);
   } else {
     UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 15396d6..7d413c5 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -317,7 +317,10 @@
   int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t(inst_data));
   DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
   uint16_t size = switch_data[1];
-  DCHECK_GT(size, 0);
+  // Return length of SPARSE_SWITCH if size is 0.
+  if (size == 0) {
+    return 3;
+  }
   const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]);
   DCHECK(IsAligned<4>(keys));
   const int32_t* entries = keys + size;
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 5f97f94..9c48df6 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#if !defined(__clang__)
+// Clang 3.4 fails to build the goto interpreter implementation.
+
 #include "interpreter_common.h"
 #include "safe_math.h"
 
@@ -252,14 +255,8 @@
   }
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(RETURN_VOID) {
+  HANDLE_INSTRUCTION_START(RETURN_VOID_NO_BARRIER) {
     JValue result;
-    if (do_access_check) {
-      // If access checks are required then the dex-to-dex compiler and analysis of
-      // whether the class has final fields hasn't been performed. Conservatively
-      // perform the memory barrier now.
-      QuasiAtomic::ThreadFenceForConstructor();
-    }
     self->AllowThreadSuspension();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
@@ -274,7 +271,7 @@
   }
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(RETURN_VOID_BARRIER) {
+  HANDLE_INSTRUCTION_START(RETURN_VOID) {
     QuasiAtomic::ThreadFenceForConstructor();
     JValue result;
     self->AllowThreadSuspension();
@@ -2437,7 +2434,7 @@
 #define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                            \
   alt_op_##code: {                                                                                \
     if (Instruction::code != Instruction::RETURN_VOID &&                                          \
-        Instruction::code != Instruction::RETURN_VOID_BARRIER &&                                  \
+        Instruction::code != Instruction::RETURN_VOID_NO_BARRIER &&                               \
         Instruction::code != Instruction::RETURN &&                                               \
         Instruction::code != Instruction::RETURN_WIDE &&                                          \
         Instruction::code != Instruction::RETURN_OBJECT) {                                        \
@@ -2477,3 +2474,5 @@
 
 }  // namespace interpreter
 }  // namespace art
+
+#endif
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 9313c75..609faf5 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -170,14 +170,8 @@
         inst = inst->Next_1xx();
         break;
       }
-      case Instruction::RETURN_VOID: {
+      case Instruction::RETURN_VOID_NO_BARRIER: {
         JValue result;
-        if (do_access_check) {
-          // If access checks are required then the dex-to-dex compiler and analysis of
-          // whether the class has final fields hasn't been performed. Conservatively
-          // perform the memory barrier now.
-          QuasiAtomic::ThreadFenceForConstructor();
-        }
         self->AllowThreadSuspension();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -189,7 +183,7 @@
         }
         return result;
       }
-      case Instruction::RETURN_VOID_BARRIER: {
+      case Instruction::RETURN_VOID: {
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index fbbc863..98dfdbd 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -460,7 +460,7 @@
 static void UnstartedDoubleDoubleToRawLongBits(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   double in = shadow_frame->GetVRegDouble(arg_offset);
-  result->SetJ(bit_cast<int64_t>(in));
+  result->SetJ(bit_cast<int64_t, double>(in));
 }
 
 static mirror::Object* GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index e68616f..09bfbf3 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -631,20 +631,20 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_LAZY);
+  void* handle = dlopen(path_str, RTLD_NOW);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
-      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_LAZY);
+      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_NOW);
       needs_native_bridge = true;
     }
   }
 
-  VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_LAZY) returned " << handle << "]";
+  VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_NOW) returned " << handle << "]";
 
   if (handle == nullptr) {
     *error_msg = dlerror();
-    LOG(ERROR) << "dlopen(\"" << path << "\", RTLD_LAZY) failed: " << *error_msg;
+    VLOG(jni) << "dlopen(\"" << path << "\", RTLD_NOW) failed: " << *error_msg;
     return false;
   }
 
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4bf7142..c9a4483 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -133,7 +133,6 @@
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
       case EK_METHOD_EXIT_WITH_RETURN_VALUE:
-      case EK_SINGLE_STEP:
       case EK_FIELD_ACCESS:
       case EK_FIELD_MODIFICATION:
         return true;
@@ -278,16 +277,7 @@
         Dbg::UnconfigureStep(pMod->step.threadId);
       }
     }
-    if (pEvent->eventKind == EK_SINGLE_STEP) {
-      // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
-      // loop between each single-step. In a IDE, this would happens each time the user click on the
-      // "single-step" button. Here we delay the full undeoptimization to the next resume
-      // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
-      // runtime shutdown).
-      // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
-      // deoptimization and only the last single-step will trigger a full undeoptimization.
-      Dbg::DelayFullUndeoptimization();
-    } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
+    if (NeedsFullDeoptimization(pEvent->eventKind)) {
       CHECK_EQ(req.GetKind(), DeoptimizationRequest::kNothing);
       CHECK(req.Method() == nullptr);
       req.SetKind(DeoptimizationRequest::kFullUndeoptimization);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c7083dc..add1394 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -295,7 +295,6 @@
  */
 static JdwpError VM_Resume(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::ProcessDelayedFullUndeoptimizations();
   Dbg::ResumeVM();
   return ERR_NONE;
 }
@@ -989,8 +988,6 @@
     return ERR_NONE;
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   Dbg::ResumeThread(thread_id);
   return ERR_NONE;
 }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 3d69796..e2b88a5 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -322,8 +322,6 @@
     CHECK(event_list_ == nullptr);
   }
 
-  Dbg::ProcessDelayedFullUndeoptimizations();
-
   /*
    * Should not have one of these in progress.  If the debugger went away
    * mid-request, though, we could see this.
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 729791f..13c1f81 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -32,10 +32,8 @@
 namespace jit {
 
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
-  if (!options.GetOrDefault(RuntimeArgumentMap::UseJIT)) {
-    return nullptr;
-  }
   auto* jit_options = new JitOptions;
+  jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
   jit_options->code_cache_capacity_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
   jit_options->compile_threshold_ =
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 6b206d1..3e80aef 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -100,13 +100,21 @@
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  bool UseJIT() const {
+    return use_jit_;
+  }
+  void SetUseJIT(bool b) {
+    use_jit_ = b;
+  }
 
  private:
+  bool use_jit_;
   size_t code_cache_capacity_;
   size_t compile_threshold_;
   bool dump_info_on_shutdown_;
 
-  JitOptions() : code_cache_capacity_(0), compile_threshold_(0), dump_info_on_shutdown_(false) {
+  JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
+      dump_info_on_shutdown_(false) {
   }
 };
 
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index aa8c717..8a20e39 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -47,33 +47,50 @@
   static constexpr size_t kMaxCapacity = 1 * GB;
   static constexpr size_t kDefaultCapacity = 2 * MB;
 
+  // Create the code cache with a code + data capacity equal to "capacity", error message is passed
+  // in the out arg error_msg.
   static JitCodeCache* Create(size_t capacity, std::string* error_msg);
 
   const uint8_t* CodeCachePtr() const {
     return code_cache_ptr_;
   }
+
   size_t CodeCacheSize() const {
     return code_cache_ptr_ - code_cache_begin_;
   }
+
   size_t CodeCacheRemain() const {
     return code_cache_end_ - code_cache_ptr_;
   }
+
+  const uint8_t* DataCachePtr() const {
+    return data_cache_ptr_;
+  }
+
   size_t DataCacheSize() const {
     return data_cache_ptr_ - data_cache_begin_;
   }
+
   size_t DataCacheRemain() const {
     return data_cache_end_ - data_cache_ptr_;
   }
+
   size_t NumMethods() const {
     return num_methods_;
   }
 
+  // Return true if the code cache contains the code pointer which si the entrypoint of the method.
   bool ContainsMethod(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Return true if the code cache contains a code ptr.
   bool ContainsCodePtr(const void* ptr) const;
 
+  // Reserve a region of code of size at least "size". Returns nullptr if there is no more room.
   uint8_t* ReserveCode(Thread* self, size_t size) LOCKS_EXCLUDED(lock_);
 
+  // Add a data array of size (end - begin) with the associated contents, returns nullptr if there
+  // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
       LOCKS_EXCLUDED(lock_);
 
@@ -81,14 +98,19 @@
   const void* GetCodeFor(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
+  // Save the compiled code for a method so that GetCodeFor(method) will return old_code_ptr if the
+  // entrypoint isn't within the cache.
   void SaveCompiledCode(mirror::ArtMethod* method, const void* old_code_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
  private:
   // Takes ownership of code_mem_map.
   explicit JitCodeCache(MemMap* code_mem_map);
+
+  // Unimplemented, TODO: Determine if it is necessary.
   void FlushInstructionCache();
 
+  // Lock which guards.
   Mutex lock_;
   // Mem map which holds code and data. We do this since we need to have 32 bit offsets from method
   // headers in code cache which point to things in the data cache. If the maps are more than 4GB
@@ -106,7 +128,7 @@
   // TODO: This relies on methods not moving.
   // This map holds code for methods if they were deoptimized by the instrumentation stubs. This is
   // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks.
-  SafeMap<mirror::ArtMethod*, const void*> method_code_map_;
+  SafeMap<mirror::ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JitCodeCache);
 };
diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc
new file mode 100644
index 0000000..2155552
--- /dev/null
+++ b/runtime/jit/jit_code_cache_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+
+#include "class_linker.h"
+#include "jit_code_cache.h"
+#include "mirror/art_method-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace jit {
+
+class JitCodeCacheTest : public CommonRuntimeTest {
+ public:
+};
+
+TEST_F(JitCodeCacheTest, TestCoverage) {
+  std::string error_msg;
+  constexpr size_t kSize = 1 * MB;
+  std::unique_ptr<JitCodeCache> code_cache(
+      JitCodeCache::Create(kSize, &error_msg));
+  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
+  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
+  ASSERT_EQ(code_cache->CodeCacheSize(), 0u);
+  ASSERT_GT(code_cache->CodeCacheRemain(), 0u);
+  ASSERT_TRUE(code_cache->DataCachePtr() != nullptr);
+  ASSERT_EQ(code_cache->DataCacheSize(), 0u);
+  ASSERT_GT(code_cache->DataCacheRemain(), 0u);
+  ASSERT_EQ(code_cache->CodeCacheRemain() + code_cache->DataCacheRemain(), kSize);
+  ASSERT_EQ(code_cache->NumMethods(), 0u);
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  uint8_t* const reserved_code = code_cache->ReserveCode(soa.Self(), 4 * KB);
+  ASSERT_TRUE(reserved_code != nullptr);
+  ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code));
+  ASSERT_EQ(code_cache->NumMethods(), 1u);
+  ClassLinker* const cl = Runtime::Current()->GetClassLinker();
+  auto h_method = hs.NewHandle(cl->AllocArtMethod(soa.Self()));
+  ASSERT_FALSE(code_cache->ContainsMethod(h_method.Get()));
+  h_method->SetEntryPointFromQuickCompiledCode(reserved_code);
+  ASSERT_TRUE(code_cache->ContainsMethod(h_method.Get()));
+  ASSERT_EQ(code_cache->GetCodeFor(h_method.Get()), reserved_code);
+  // Save the code and then change it.
+  code_cache->SaveCompiledCode(h_method.Get(), reserved_code);
+  h_method->SetEntryPointFromQuickCompiledCode(nullptr);
+  ASSERT_EQ(code_cache->GetCodeFor(h_method.Get()), reserved_code);
+  const uint8_t data_arr[] = {1, 2, 3, 4, 5};
+  uint8_t* data_ptr = code_cache->AddDataArray(soa.Self(), data_arr, data_arr + sizeof(data_arr));
+  ASSERT_TRUE(data_ptr != nullptr);
+  ASSERT_EQ(memcmp(data_ptr, data_arr, sizeof(data_arr)), 0);
+}
+
+TEST_F(JitCodeCacheTest, TestOverflow) {
+  std::string error_msg;
+  constexpr size_t kSize = 1 * MB;
+  std::unique_ptr<JitCodeCache> code_cache(
+      JitCodeCache::Create(kSize, &error_msg));
+  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
+  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
+  size_t code_bytes = 0;
+  size_t data_bytes = 0;
+  constexpr size_t kCodeArrSize = 4 * KB;
+  constexpr size_t kDataArrSize = 4 * KB;
+  uint8_t data_arr[kDataArrSize] = {53};
+  // Add code and data until we are full.
+  uint8_t* code_ptr = nullptr;
+  uint8_t* data_ptr = nullptr;
+  do {
+    code_ptr = code_cache->ReserveCode(Thread::Current(), kCodeArrSize);
+    data_ptr = code_cache->AddDataArray(Thread::Current(), data_arr, data_arr + kDataArrSize);
+    if (code_ptr != nullptr) {
+      code_bytes += kCodeArrSize;
+    }
+    if (data_ptr != nullptr) {
+      data_bytes += kDataArrSize;
+    }
+  } while (code_ptr != nullptr || data_ptr != nullptr);
+  // Make sure we added a reasonable amount
+  CHECK_GT(code_bytes, 0u);
+  CHECK_LE(code_bytes, kSize);
+  CHECK_GT(data_bytes, 0u);
+  CHECK_LE(data_bytes, kSize);
+  CHECK_GE(code_bytes + data_bytes, kSize * 4 / 5);
+}
+
+}  // namespace jit
+}  // namespace art
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 1048214..5516eab 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1355,24 +1355,38 @@
   s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80");
   EXPECT_NE(s, nullptr);
   EXPECT_EQ(2, env_->GetStringLength(s));
-  // Note that this uses 2 x 3 byte UTF sequences, one
-  // for each half of the surrogate pair.
-  EXPECT_EQ(6, env_->GetStringUTFLength(s));
+
+  // The surrogate pair gets encoded into a 4 byte UTF sequence..
+  EXPECT_EQ(4, env_->GetStringUTFLength(s));
   const char* chars = env_->GetStringUTFChars(s, nullptr);
-  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80", chars);
+  EXPECT_STREQ("\xf0\x90\x90\x80", chars);
   env_->ReleaseStringUTFChars(s, chars);
 
+  // .. but is stored as is in the utf-16 representation.
+  const jchar* jchars = env_->GetStringChars(s, nullptr);
+  EXPECT_EQ(0xd801, jchars[0]);
+  EXPECT_EQ(0xdc00, jchars[1]);
+  env_->ReleaseStringChars(s, jchars);
+
   // 4 byte UTF sequence appended to an encoded surrogate pair.
   s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80 \xf0\x9f\x8f\xa0");
   EXPECT_NE(s, nullptr);
-  EXPECT_EQ(5, env_->GetStringLength(s));
-  EXPECT_EQ(13, env_->GetStringUTFLength(s));
-  chars = env_->GetStringUTFChars(s, nullptr);
+
   // The 4 byte sequence {0xf0, 0x9f, 0x8f, 0xa0} is converted into a surrogate
-  // pair {0xd83c, 0xdfe0} which is then converted into a two three byte
-  // sequences {0xed 0xa0, 0xbc} and {0xed, 0xbf, 0xa0}, one for each half of
-  // the surrogate pair.
-  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80 \xed\xa0\xbc\xed\xbf\xa0", chars);
+  // pair {0xd83c, 0xdfe0}.
+  EXPECT_EQ(5, env_->GetStringLength(s));
+  jchars = env_->GetStringChars(s, nullptr);
+  // The first surrogate pair, encoded as such in the input.
+  EXPECT_EQ(0xd801, jchars[0]);
+  EXPECT_EQ(0xdc00, jchars[1]);
+  // The second surrogate pair, from the 4 byte UTF sequence in the input.
+  EXPECT_EQ(0xd83c, jchars[3]);
+  EXPECT_EQ(0xdfe0, jchars[4]);
+  env_->ReleaseStringChars(s, jchars);
+
+  EXPECT_EQ(9, env_->GetStringUTFLength(s));
+  chars = env_->GetStringUTFChars(s, nullptr);
+  EXPECT_STREQ("\xf0\x90\x90\x80 \xf0\x9f\x8f\xa0", chars);
   env_->ReleaseStringUTFChars(s, chars);
 
   // A string with 1, 2, 3 and 4 byte UTF sequences with spaces
@@ -1380,7 +1394,7 @@
   s = env_->NewStringUTF("\x24 \xc2\xa2 \xe2\x82\xac \xf0\x9f\x8f\xa0");
   EXPECT_NE(s, nullptr);
   EXPECT_EQ(8, env_->GetStringLength(s));
-  EXPECT_EQ(15, env_->GetStringUTFLength(s));
+  EXPECT_EQ(13, env_->GetStringUTFLength(s));
 }
 
 TEST_F(JniInternalTest, NewString) {
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 939a1a9..f867f6a 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/value_object.h"
@@ -60,23 +61,6 @@
     *ComputeInternalPointer<T>(offset) = value;
   }
 
-  // TODO: Local hack to prevent name clashes between two conflicting
-  // implementations of bit_cast:
-  // - art::bit_cast<Destination, Source> runtime/base/casts.h, and
-  // - art::bit_cast<Source, Destination> from runtime/utils.h.
-  // Remove this when these routines have been merged.
-  template<typename Source, typename Destination>
-  static Destination local_bit_cast(Source in) {
-    static_assert(sizeof(Source) <= sizeof(Destination),
-                  "Size of Source not <= size of Destination");
-    union {
-      Source u;
-      Destination v;
-    } tmp;
-    tmp.u = in;
-    return tmp.v;
-  }
-
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` does not need to be word-aligned.
   template<typename T> T LoadUnaligned(uintptr_t offset) const {
@@ -88,7 +72,7 @@
       equivalent_unsigned_integer_value +=
           *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte);
     }
-    return local_bit_cast<U, T>(equivalent_unsigned_integer_value);
+    return bit_cast<T, U>(equivalent_unsigned_integer_value);
   }
 
   // Store `value` (of type `T`) at `offset`.  The memory address
@@ -96,7 +80,7 @@
   template<typename T> void StoreUnaligned(uintptr_t offset, T value) const {
     // Equivalent unsigned integer type corresponding to T.
     typedef typename UnsignedIntegerType<sizeof(T)>::type U;
-    U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value);
+    U equivalent_unsigned_integer_value = bit_cast<U, T>(value);
     // Write the value byte by byte in a little-endian fashion.
     for (size_t i = 0; i < sizeof(U); ++i) {
       *ComputeInternalPointer<uint8_t>(offset + i) =
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index c27c6e9..0ccf5db 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -230,10 +230,6 @@
   return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
 }
 
-inline StackMap ArtMethod::GetStackMap(uint32_t native_pc_offset) {
-  return GetOptimizedCodeInfo().GetStackMapForNativePcOffset(native_pc_offset);
-}
-
 inline CodeInfo ArtMethod::GetOptimizedCodeInfo() {
   DCHECK(IsOptimized(sizeof(void*)));
   const void* code_pointer = GetQuickOatCodePointer(sizeof(void*));
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index bc58709..c1f7594 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -202,8 +202,8 @@
   const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
   uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   if (IsOptimized(sizeof(void*))) {
-    uint32_t ret = GetStackMap(sought_offset).GetDexPc();
-    return ret;
+    CodeInfo code_info = GetOptimizedCodeInfo();
+    return code_info.GetStackMapForNativePcOffset(sought_offset).GetDexPc(code_info);
   }
 
   MappingTable table(entry_point != nullptr ?
@@ -401,7 +401,9 @@
 
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
-  if (UNLIKELY(!runtime->IsStarted())) {
+  // If the runtime is not yet started or it is required by the debugger, then perform the
+  // Invocation by the interpreter.
+  if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
       art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
     } else {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index d878f25..82e5d00 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -125,6 +125,14 @@
     return (GetAccessFlags() & kAccNative) != 0;
   }
 
+  bool ShouldNotInline() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccDontInline) != 0;
+  }
+
+  void SetShouldNotInline() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetAccessFlags(GetAccessFlags() | kAccDontInline);
+  }
+
   bool IsFastNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t mask = kAccFastNative | kAccNative;
     return (GetAccessFlags() & mask) == mask;
@@ -348,7 +356,6 @@
   const uint8_t* GetVmapTable(const void* code_pointer, size_t pointer_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  StackMap GetStackMap(uint32_t native_pc_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   CodeInfo GetOptimizedCodeInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a GcMap instance for convenient access.
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 97052f1..c368dc6 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -45,7 +45,7 @@
     bool is_variable_size = IsVariableSize<kVerifyFlags, kReadBarrierOption>();
     CHECK(!is_variable_size) << " class=" << PrettyTypeOf(this);
   }
-  return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_));
+  return GetField32(ObjectSizeOffset());
 }
 
 inline Class* Class::GetSuperClass() {
@@ -523,7 +523,7 @@
       << " IsArtField=" << (this == ArtField::GetJavaLangReflectArtField())
       << " IsArtMethod=" << (this == ArtMethod::GetJavaLangReflectArtMethod())
       << " descriptor=" << PrettyDescriptor(this);
-  return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+  return GetField32<kVerifyFlags>(AccessFlagsOffset());
 }
 
 inline String* Class::GetName() {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 6f4ef60..9fa6073 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -55,26 +55,27 @@
   java_lang_Class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
 }
 
-void Class::SetStatus(Status new_status, Thread* self) {
-  Status old_status = GetStatus();
+void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) {
+  Status old_status = h_this->GetStatus();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool class_linker_initialized = class_linker != nullptr && class_linker->IsInitialized();
   if (LIKELY(class_linker_initialized)) {
     if (UNLIKELY(new_status <= old_status && new_status != kStatusError &&
                  new_status != kStatusRetired)) {
-      LOG(FATAL) << "Unexpected change back of class status for " << PrettyClass(this) << " "
-          << old_status << " -> " << new_status;
+      LOG(FATAL) << "Unexpected change back of class status for " << PrettyClass(h_this.Get())
+                 << " " << old_status << " -> " << new_status;
     }
     if (new_status >= kStatusResolved || old_status >= kStatusResolved) {
       // When classes are being resolved the resolution code should hold the lock.
-      CHECK_EQ(GetLockOwnerThreadId(), self->GetThreadId())
+      CHECK_EQ(h_this->GetLockOwnerThreadId(), self->GetThreadId())
             << "Attempt to change status of class while not holding its lock: "
-            << PrettyClass(this) << " " << old_status << " -> " << new_status;
+            << PrettyClass(h_this.Get()) << " " << old_status << " -> " << new_status;
     }
   }
   if (UNLIKELY(new_status == kStatusError)) {
-    CHECK_NE(GetStatus(), kStatusError)
-        << "Attempt to set as erroneous an already erroneous class " << PrettyClass(this);
+    CHECK_NE(h_this->GetStatus(), kStatusError)
+        << "Attempt to set as erroneous an already erroneous class "
+        << PrettyClass(h_this.Get());
 
     // Stash current exception.
     StackHandleScope<1> hs(self);
@@ -100,7 +101,7 @@
       // case.
       Class* exception_class = old_exception->GetClass();
       if (!eiie_class->IsAssignableFrom(exception_class)) {
-        SetVerifyErrorClass(exception_class);
+        h_this->SetVerifyErrorClass(exception_class);
       }
     }
 
@@ -109,9 +110,9 @@
   }
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
-    SetField32Volatile<true>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
+    h_this->SetField32Volatile<true>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
   } else {
-    SetField32Volatile<false>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
+    h_this->SetField32Volatile<false>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
   }
 
   if (!class_linker_initialized) {
@@ -121,17 +122,17 @@
   } else {
     // Classes that are being resolved or initialized need to notify waiters that the class status
     // changed. See ClassLinker::EnsureResolved and ClassLinker::WaitForInitializeClass.
-    if (IsTemp()) {
+    if (h_this->IsTemp()) {
       // Class is a temporary one, ensure that waiters for resolution get notified of retirement
       // so that they can grab the new version of the class from the class linker's table.
-      CHECK_LT(new_status, kStatusResolved) << PrettyDescriptor(this);
+      CHECK_LT(new_status, kStatusResolved) << PrettyDescriptor(h_this.Get());
       if (new_status == kStatusRetired || new_status == kStatusError) {
-        NotifyAll(self);
+        h_this->NotifyAll(self);
       }
     } else {
       CHECK_NE(new_status, kStatusRetired);
       if (old_status >= kStatusResolved || new_status >= kStatusResolved) {
-        NotifyAll(self);
+        h_this->NotifyAll(self);
       }
     }
   }
@@ -828,11 +829,12 @@
   void operator()(Object* obj, size_t usable_size) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     UNUSED(usable_size);
-    mirror::Class* new_class_obj = obj->AsClass();
-    mirror::Object::CopyObject(self_, new_class_obj, orig_->Get(), copy_bytes_);
-    new_class_obj->SetStatus(Class::kStatusResolving, self_);
-    new_class_obj->PopulateEmbeddedImtAndVTable(imt_handle_scope_);
-    new_class_obj->SetClassSize(new_length_);
+    StackHandleScope<1> hs(self_);
+    Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
+    mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
+    mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
+    h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_handle_scope_);
+    h_new_class_obj->SetClassSize(new_length_);
   }
 
  private:
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index bd49754..2dff383 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -140,7 +140,9 @@
         GetField32Volatile<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, status_)));
   }
 
-  void SetStatus(Status new_status, Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // This is static because 'this' may be moved by GC.
+  static void SetStatus(Handle<Class> h_this, Status new_status, Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset StatusOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, status_);
@@ -202,6 +204,9 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static MemberOffset AccessFlagsOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, access_flags_);
+  }
 
   void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -525,6 +530,9 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static MemberOffset ObjectSizeOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, object_size_);
+  }
 
   void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!IsVariableSize());
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 09dc78a..e7bd207 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -48,6 +48,10 @@
 static constexpr uint32_t kAccFastNative =           0x00080000;  // method (dex only)
 static constexpr uint32_t kAccMiranda =              0x00200000;  // method (dex only)
 
+// Flag is set if the compiler decides it is not worth trying
+// to inline the method. This avoids other callers to try it again and again.
+static constexpr uint32_t kAccDontInline =           0x00400000;  // method (dex only)
+
 // Special runtime-only flags.
 // Note: if only kAccClassIsReference is set, we have a soft reference.
 
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 8395150..022c56f 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -21,6 +21,7 @@
 #include "arch/instruction_set.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
+#include "jit/jit.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
 #include "ScopedUtfChars.h"
@@ -94,18 +95,17 @@
     debug_flags &= ~DEBUG_ENABLE_SAFEMODE;
   }
 
+  bool use_jit = false;
   if ((debug_flags & DEBUG_ENABLE_JIT) != 0) {
     if (safe_mode) {
-      LOG(INFO) << "Not enabling JIT due to VM safe mode";
+      LOG(INFO) << "Not enabling JIT due to safe mode";
     } else {
-      if (runtime->GetJit() == nullptr) {
-        runtime->CreateJit();
-      } else {
-        LOG(INFO) << "Not late-enabling JIT (already on)";
-      }
+      use_jit = true;
+      LOG(INFO) << "Late-enabling JIT";
     }
     debug_flags &= ~DEBUG_ENABLE_JIT;
   }
+  runtime->GetJITOptions()->SetUseJIT(use_jit);
 
   // This is for backwards compatibility with Dalvik.
   debug_flags &= ~DEBUG_ENABLE_ASSERT;
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 84b18ab..bd043a8 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -30,6 +30,12 @@
 #include "ScopedUtfChars.h"
 #include "verify_object-inl.h"
 
+#include <sstream>
+#ifdef HAVE_ANDROID_OS
+// This function is provided by android linker.
+extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
+#endif  // HAVE_ANDROID_OS
+
 namespace art {
 
 static void Runtime_gc(JNIEnv*, jclass) {
@@ -46,30 +52,53 @@
   exit(status);
 }
 
-static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPath) {
-  // TODO: returns NULL on success or an error message describing the failure on failure. This
-  // should be refactored in terms of suppressed exceptions.
-  ScopedUtfChars filename(env, javaFilename);
-  if (filename.c_str() == NULL) {
-    return NULL;
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) {
+#ifdef HAVE_ANDROID_OS
+  std::stringstream ss;
+  if (javaLdLibraryPathJstr != nullptr) {
+    ScopedUtfChars javaLdLibraryPath(env, javaLdLibraryPathJstr);
+    if (javaLdLibraryPath.c_str() != nullptr) {
+      ss << javaLdLibraryPath.c_str();
+    }
   }
 
-  if (javaLdLibraryPath != NULL) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
-    if (ldLibraryPath.c_str() == NULL) {
-      return NULL;
-    }
-    void* sym = dlsym(RTLD_DEFAULT, "android_update_LD_LIBRARY_PATH");
-    if (sym != NULL) {
-      typedef void (*Fn)(const char*);
-      Fn android_update_LD_LIBRARY_PATH = reinterpret_cast<Fn>(sym);
-      (*android_update_LD_LIBRARY_PATH)(ldLibraryPath.c_str());
-    } else {
-      LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  if (javaDexPathJstr != nullptr) {
+    ScopedUtfChars javaDexPath(env, javaDexPathJstr);
+    if (javaDexPath.c_str() != nullptr) {
+      std::vector<std::string> dexPathVector;
+      Split(javaDexPath.c_str(), ':', &dexPathVector);
+
+      for (auto abi : art::Runtime::Current()->GetCpuAbilist()) {
+        for (auto zip_path : dexPathVector) {
+          // Native libraries live under lib/<abi>/ inside .apk file.
+          ss << ":" << zip_path << "!" << "lib/" << abi;
+        }
+      }
     }
   }
 
+  std::string ldLibraryPathStr = ss.str();
+  const char* ldLibraryPath = ldLibraryPathStr.c_str();
+  if (*ldLibraryPath == ':') {
+    ++ldLibraryPath;
+  }
+
+  android_update_LD_LIBRARY_PATH(ldLibraryPath);
+#else
+  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  UNUSED(javaLdLibraryPathJstr, javaDexPathJstr, env);
+#endif
+}
+
+static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
+                                  jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (filename.c_str() == nullptr) {
+    return nullptr;
+  }
+
+  SetLdLibraryPath(env, javaLdLibraryPathJstr, javaDexPathJstr);
+
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
@@ -101,7 +130,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/oat.h b/runtime/oat.h
index d420c3a..120de6d 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '5', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '1', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 356e3d2..69cb22d 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -17,10 +17,11 @@
 #include "oat_file.h"
 
 #include <dlfcn.h>
-#include <sstream>
 #include <string.h>
 #include <unistd.h>
 
+#include <sstream>
+
 #include "base/bit_vector.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
@@ -38,12 +39,33 @@
 
 namespace art {
 
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
 void OatFile::CheckLocation(const std::string& location) {
   CHECK(!location.empty());
 }
 
 OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
                                   const std::string& location,
+                                  const char* abs_dex_location,
                                   std::string* error_msg) {
   std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
   oat_file->elf_file_.reset(elf_file);
@@ -53,7 +75,7 @@
   oat_file->begin_ = elf_file->Begin() + offset;
   oat_file->end_ = elf_file->Begin() + size + offset;
   // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(error_msg) ? oat_file.release() : nullptr;
+  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
 }
 
 OatFile* OatFile::Open(const std::string& filename,
@@ -61,6 +83,7 @@
                        uint8_t* requested_base,
                        uint8_t* oat_file_begin,
                        bool executable,
+                       const char* abs_dex_location,
                        std::string* error_msg) {
   CHECK(!filename.empty()) << location;
   CheckLocation(location);
@@ -80,7 +103,7 @@
     return nullptr;
   }
   ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        error_msg));
+                        abs_dex_location, error_msg));
 
   // It would be nice to unlink here. But we might have opened the file created by the
   // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
@@ -88,14 +111,18 @@
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location, std::string* error_msg) {
+OatFile* OatFile::OpenWritable(File* file, const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
   CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, error_msg);
+  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
 }
 
-OatFile* OatFile::OpenReadable(File* file, const std::string& location, std::string* error_msg) {
+OatFile* OatFile::OpenReadable(File* file, const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
   CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, error_msg);
+  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
 }
 
 OatFile* OatFile::OpenElfFile(File* file,
@@ -104,10 +131,11 @@
                               uint8_t* oat_file_begin,
                               bool writable,
                               bool executable,
+                              const char* abs_dex_location,
                               std::string* error_msg) {
   std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
   bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       error_msg);
+                                       abs_dex_location, error_msg);
   if (!success) {
     CHECK(!error_msg->empty());
     return nullptr;
@@ -131,6 +159,7 @@
 
 bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
                           bool writable, bool executable,
+                          const char* abs_dex_location,
                           std::string* error_msg) {
   // TODO: rename requested_base to oat_data_begin
   elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
@@ -180,10 +209,10 @@
     bss_end_ += sizeof(uint32_t);
   }
 
-  return Setup(error_msg);
+  return Setup(abs_dex_location, error_msg);
 }
 
-bool OatFile::Setup(std::string* error_msg) {
+bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s", GetLocation().c_str(),
@@ -230,7 +259,9 @@
       return false;
     }
 
-    std::string dex_file_location(dex_file_location_data, dex_file_location_size);
+    std::string dex_file_location = ResolveRelativeEncodedDexLocation(
+        abs_dex_location,
+        std::string(dex_file_location_data, dex_file_location_size));
 
     uint32_t dex_file_checksum = *reinterpret_cast<const uint32_t*>(oat);
     oat += sizeof(dex_file_checksum);
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 564185c..51952f3 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -43,14 +43,18 @@
   // Opens an oat file contained within the given elf file. This is always opened as
   // non-executable at the moment.
   static OatFile* OpenWithElfFile(ElfFile* elf_file, const std::string& location,
+                                  const char* abs_dex_location,
                                   std::string* error_msg);
   // Open an oat file. Returns NULL on failure.  Requested base can
   // optionally be used to request where the file should be loaded.
+  // See the ResolveRelativeEncodedDexLocation for a description of how the
+  // abs_dex_location argument is used.
   static OatFile* Open(const std::string& filename,
                        const std::string& location,
                        uint8_t* requested_base,
                        uint8_t* oat_file_begin,
                        bool executable,
+                       const char* abs_dex_location,
                        std::string* error_msg);
 
   // Open an oat file from an already opened File.
@@ -58,9 +62,13 @@
   // where relocations may be required. Currently used from
   // ImageWriter which wants to open a writable version from an existing
   // file descriptor for patching.
-  static OatFile* OpenWritable(File* file, const std::string& location, std::string* error_msg);
+  static OatFile* OpenWritable(File* file, const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg);
   // Opens an oat file from an already opened File. Maps it PROT_READ, MAP_PRIVATE.
-  static OatFile* OpenReadable(File* file, const std::string& location, std::string* error_msg);
+  static OatFile* OpenReadable(File* file, const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg);
 
   ~OatFile();
 
@@ -279,6 +287,18 @@
   const uint8_t* BssBegin() const;
   const uint8_t* BssEnd() const;
 
+  // Returns the absolute dex location for the encoded relative dex location.
+  //
+  // If not nullptr, abs_dex_location is used to resolve the absolute dex
+  // location of relative dex locations encoded in the oat file.
+  // For example, given absolute location "/data/app/foo/base.apk", encoded
+  // dex locations "base.apk", "base.apk:classes2.dex", etc. would be resolved
+  // to "/data/app/foo/base.apk", "/data/app/foo/base.apk:classes2.dex", etc.
+  // Relative encoded dex locations that don't match the given abs_dex_location
+  // are left unchanged.
+  static std::string ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location);
+
  private:
   static void CheckLocation(const std::string& location);
 
@@ -288,14 +308,17 @@
                               uint8_t* oat_file_begin,  // Override base if not null
                               bool writable,
                               bool executable,
+                              const char* abs_dex_location,
                               std::string* error_msg);
 
   explicit OatFile(const std::string& filename, bool executable);
   bool ElfFileOpen(File* file, uint8_t* requested_base,
                    uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
                    bool writable, bool executable,
+                   const char* abs_dex_location,
                    std::string* error_msg);
-  bool Setup(std::string* error_msg);
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
 
   // The oat file name.
   //
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index f87fa4f..9a17b01 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -850,7 +850,7 @@
       std::string error_msg;
       cached_odex_file_.reset(OatFile::Open(odex_file_name.c_str(),
             odex_file_name.c_str(), nullptr, nullptr, load_executable_,
-            &error_msg));
+            dex_location_, &error_msg));
       if (cached_odex_file_.get() == nullptr) {
         VLOG(oat) << "OatFileAssistant test for existing pre-compiled oat file "
           << odex_file_name << ": " << error_msg;
@@ -875,7 +875,8 @@
       const std::string& oat_file_name = *OatFileName();
       std::string error_msg;
       cached_oat_file_.reset(OatFile::Open(oat_file_name.c_str(),
-            oat_file_name.c_str(), nullptr, nullptr, load_executable_, &error_msg));
+            oat_file_name.c_str(), nullptr, nullptr, load_executable_,
+            dex_location_, &error_msg));
       if (cached_oat_file_.get() == nullptr) {
         VLOG(oat) << "OatFileAssistant test for existing oat file "
           << oat_file_name << ": " << error_msg;
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index be8652c..41dc2d7 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -326,12 +326,43 @@
   GenerateOatForTest(dex_location.c_str());
 
   // Verify we can load both dex files.
-  OatFileAssistant executable_oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
-  std::unique_ptr<OatFile> oat_file = executable_oat_file_assistant.GetBestOatFile();
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  dex_files = executable_oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  EXPECT_EQ(2u, dex_files.size());
+}
+
+// Case: We have a MultiDEX file and up-to-date OAT file for it with relative
+// encoded dex locations.
+// Expect: The oat file status is kUpToDate.
+TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
+  std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar";
+  std::string oat_location = GetISADir() + "/RelativeEncodedDexLocation.oat";
+
+  // Create the dex file
+  Copy(GetMultiDexSrc1(), dex_location);
+
+  // Create the oat file with relative encoded dex location.
+  std::vector<std::string> args;
+  args.push_back("--dex-file=" + dex_location);
+  args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar"));
+  args.push_back("--oat-file=" + oat_location);
+
+  std::string error_msg;
+  ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
+
+  // Verify we can load both dex files.
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+                                      oat_location.c_str(),
+                                      kRuntimeISA, true);
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+  EXPECT_TRUE(oat_file->IsExecutable());
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
   EXPECT_EQ(2u, dex_files.size());
 }
 
diff --git a/runtime/oat_file_test.cc b/runtime/oat_file_test.cc
new file mode 100644
index 0000000..f2213e9
--- /dev/null
+++ b/runtime/oat_file_test.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oat_file.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+namespace art {
+
+TEST(OatFileTest, ResolveRelativeEncodedDexLocation) {
+  EXPECT_EQ(std::string("/data/app/foo/base.apk"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        nullptr, "/data/app/foo/base.apk"));
+
+  EXPECT_EQ(std::string("/system/framework/base.apk"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/base.apk", "/system/framework/base.apk"));
+
+  EXPECT_EQ(std::string("/data/app/foo/base.apk"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/base.apk", "base.apk"));
+
+  EXPECT_EQ(std::string("/data/app/foo/base.apk"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/base.apk", "foo/base.apk"));
+
+  EXPECT_EQ(std::string("/data/app/foo/base.apk:classes2.dex"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/base.apk", "base.apk:classes2.dex"));
+
+  EXPECT_EQ(std::string("/data/app/foo/base.apk:classes11.dex"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/base.apk", "base.apk:classes11.dex"));
+
+  EXPECT_EQ(std::string("base.apk"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/sludge.apk", "base.apk"));
+
+  EXPECT_EQ(std::string("o/base.apk"),
+      OatFile::ResolveRelativeEncodedDexLocation(
+        "/data/app/foo/base.apk", "o/base.apk"));
+}
+
+}  // namespace art
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index a53aeaa..337c5df 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -255,6 +255,9 @@
           .IntoKey(M::ZygoteMaxFailedBoots)
       .Define("-Xno-dex-file-fallback")
           .IntoKey(M::NoDexFileFallback)
+      .Define("--cpu-abilist=_")
+          .WithType<std::string>()
+          .IntoKey(M::CpuAbiList)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 2d6b6b3..32bfdaf 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -153,7 +153,10 @@
   }
 
   static bool IsIntegralType(Type type) {
+    // The Java language does not allow treating boolean as an integral type but
+    // our bit representation makes it safe.
     switch (type) {
+      case kPrimBoolean:
       case kPrimByte:
       case kPrimChar:
       case kPrimShort:
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 189559e..9ca00b1 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -483,11 +483,10 @@
     }
   }
 
-  // If we are the zygote then we need to wait until after forking to create the code cache due to
-  // SELinux restrictions on r/w/x memory regions.
-  if (!IsZygote() && jit_.get() != nullptr) {
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold());
-    jit_->CreateThreadPool();
+  // If we are the zygote then we need to wait until after forking to create the code cache
+  // due to SELinux restrictions on r/w/x memory regions.
+  if (!IsZygote() && jit_options_->UseJIT()) {
+    CreateJit();
   }
 
   if (!IsImageDex2OatEnabled() || !GetHeap()->HasImageSpace()) {
@@ -611,11 +610,9 @@
 
   // Create the thread pools.
   heap_->CreateThreadPool();
-  if (jit_options_.get() != nullptr && jit_.get() == nullptr) {
+  if (jit_.get() == nullptr && jit_options_->UseJIT()) {
     // Create the JIT if the flag is set and we haven't already create it (happens for run-tests).
     CreateJit();
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold());
-    jit_->CreateThreadPool();
   }
 
   StartSignalCatcher();
@@ -691,7 +688,7 @@
     return false;
   }
   std::unique_ptr<OatFile> oat_file(OatFile::OpenWithElfFile(elf_file.release(), oat_location,
-                                                             &error_msg));
+                                                             nullptr, &error_msg));
   if (oat_file.get() == nullptr) {
     LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
@@ -795,6 +792,8 @@
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
+  Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
+
   if (runtime_options.GetOrDefault(Opt::Interpret)) {
     GetInstrumentation()->ForceInterpretOnly();
   }
@@ -843,20 +842,19 @@
     Dbg::ConfigureJdwp(runtime_options.GetOrDefault(Opt::JdwpOptions));
   }
 
-  if (!IsAotCompiler()) {
+  jit_options_.reset(jit::JitOptions::CreateFromRuntimeArguments(runtime_options));
+  bool use_jit = jit_options_->UseJIT();
+  if (IsAotCompiler()) {
     // If we are already the compiler at this point, we must be dex2oat. Don't create the jit in
     // this case.
     // If runtime_options doesn't have UseJIT set to true then CreateFromRuntimeArguments returns
     // nullptr and we don't create the jit.
-    jit_options_.reset(jit::JitOptions::CreateFromRuntimeArguments(runtime_options));
-  }
-  if (!IsZygote() && jit_options_.get() != nullptr) {
-    CreateJit();
+    use_jit = false;
   }
 
   // Use MemMap arena pool for jit, malloc otherwise. Malloc arenas are faster to allocate but
   // can't be trimmed as easily.
-  const bool use_malloc = jit_options_.get() == nullptr;
+  const bool use_malloc = !use_jit;
   arena_pool_.reset(new ArenaPool(use_malloc));
 
   BlockSignals();
@@ -1661,11 +1659,13 @@
 }
 
 void Runtime::CreateJit() {
-  CHECK(jit_options_.get() != nullptr);
+  CHECK(!IsAotCompiler());
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() != nullptr) {
     compiler_callbacks_ = jit_->GetCompilerCallbacks();
+    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold());
+    jit_->CreateThreadPool();
   } else {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
   }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 3cf22bf..9a04835 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -524,6 +524,10 @@
     return allow_dex_file_fallback_;
   }
 
+  const std::vector<std::string>& GetCpuAbilist() const {
+    return cpu_abilist_;
+  }
+
   bool RunningOnValgrind() const {
     return running_on_valgrind_;
   }
@@ -540,6 +544,7 @@
     return zygote_max_failed_boots_;
   }
 
+  // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
   ArenaPool* GetArenaPool() {
@@ -549,6 +554,10 @@
     return arena_pool_.get();
   }
 
+  jit::JitOptions* GetJITOptions() {
+    return jit_options_.get();
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -701,6 +710,9 @@
   // available/usable.
   bool allow_dex_file_fallback_;
 
+  // List of supported cpu abis.
+  std::vector<std::string> cpu_abilist_;
+
   // Specifies target SDK version to allow workarounds for certain API levels.
   int32_t target_sdk_version_;
 
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 8775f8d..1f273cf 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -62,7 +62,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                DumpJITInfoOnShutdown)
 RUNTIME_OPTIONS_KEY (Unit,                IgnoreMaxFootprint)
 RUNTIME_OPTIONS_KEY (Unit,                LowMemoryMode)
-RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        false)
+RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        kUseTlab)
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,      false)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold, jit::Jit::kDefaultCompileThreshold)
@@ -104,6 +104,7 @@
                                           ImageCompilerOptions)  // -Ximage-compiler-option ...
 RUNTIME_OPTIONS_KEY (bool,                Verify,                         true)
 RUNTIME_OPTIONS_KEY (std::string,         NativeBridge)
+RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 
 // Not parse-able from command line, but can be provided explicitly.
 RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
new file mode 100644
index 0000000..020a6e6
--- /dev/null
+++ b/runtime/stack_map.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stack_map.h"
+
+namespace art {
+
+constexpr uint32_t StackMap::kNoDexRegisterMapSmallEncoding;
+constexpr uint32_t StackMap::kNoInlineInfoSmallEncoding;
+constexpr uint32_t StackMap::kNoDexRegisterMap;
+constexpr uint32_t StackMap::kNoInlineInfo;
+
+uint32_t StackMap::GetDexPc(const CodeInfo& info) const {
+  return info.HasSmallDexPc()
+      ? region_.LoadUnaligned<kSmallEncoding>(info.ComputeStackMapDexPcOffset())
+      : region_.LoadUnaligned<kLargeEncoding>(info.ComputeStackMapDexPcOffset());
+}
+
+void StackMap::SetDexPc(const CodeInfo& info, uint32_t dex_pc) {
+  DCHECK(!info.HasSmallDexPc() || IsUint<kBitsForSmallEncoding>(dex_pc)) << dex_pc;
+  info.HasSmallDexPc()
+      ? region_.StoreUnaligned<kSmallEncoding>(info.ComputeStackMapDexPcOffset(), dex_pc)
+      : region_.StoreUnaligned<kLargeEncoding>(info.ComputeStackMapDexPcOffset(), dex_pc);
+}
+
+uint32_t StackMap::GetNativePcOffset(const CodeInfo& info) const {
+  return info.HasSmallNativePc()
+      ? region_.LoadUnaligned<kSmallEncoding>(info.ComputeStackMapNativePcOffset())
+      : region_.LoadUnaligned<kLargeEncoding>(info.ComputeStackMapNativePcOffset());
+}
+
+void StackMap::SetNativePcOffset(const CodeInfo& info, uint32_t native_pc_offset) {
+  DCHECK(!info.HasSmallNativePc()
+         || IsUint<kBitsForSmallEncoding>(native_pc_offset)) << native_pc_offset;
+  uint32_t entry = info.ComputeStackMapNativePcOffset();
+  info.HasSmallNativePc()
+      ? region_.StoreUnaligned<kSmallEncoding>(entry, native_pc_offset)
+      : region_.StoreUnaligned<kLargeEncoding>(entry, native_pc_offset);
+}
+
+uint32_t StackMap::GetDexRegisterMapOffset(const CodeInfo& info) const {
+  if (info.HasSmallDexRegisterMap()) {
+    uint8_t value = region_.LoadUnaligned<kSmallEncoding>(
+        info.ComputeStackMapDexRegisterMapOffset());
+    if (value == kNoDexRegisterMapSmallEncoding) {
+      return kNoDexRegisterMap;
+    } else {
+      return value;
+    }
+  } else {
+    return region_.LoadUnaligned<kLargeEncoding>(info.ComputeStackMapDexRegisterMapOffset());
+  }
+}
+
+void StackMap::SetDexRegisterMapOffset(const CodeInfo& info, uint32_t offset) {
+  DCHECK(!info.HasSmallDexRegisterMap()
+         || (IsUint<kBitsForSmallEncoding>(offset)
+             || (offset == kNoDexRegisterMap))) << offset;
+  size_t dex_register_map_entry = info.ComputeStackMapDexRegisterMapOffset();
+  info.HasSmallDexRegisterMap()
+      ? region_.StoreUnaligned<kSmallEncoding>(dex_register_map_entry, offset)
+      : region_.StoreUnaligned<kLargeEncoding>(dex_register_map_entry, offset);
+}
+
+uint32_t StackMap::GetInlineDescriptorOffset(const CodeInfo& info) const {
+  if (!info.HasInlineInfo()) return kNoInlineInfo;
+  if (info.HasSmallInlineInfo()) {
+    uint8_t value = region_.LoadUnaligned<kSmallEncoding>(
+        info.ComputeStackMapInlineInfoOffset());
+    if (value == kNoInlineInfoSmallEncoding) {
+      return kNoInlineInfo;
+    } else {
+      return value;
+    }
+  } else {
+    return region_.LoadUnaligned<kLargeEncoding>(info.ComputeStackMapInlineInfoOffset());
+  }
+}
+
+void StackMap::SetInlineDescriptorOffset(const CodeInfo& info, uint32_t offset) {
+  DCHECK(info.HasInlineInfo());
+  DCHECK(!info.HasSmallInlineInfo()
+         || (IsUint<kBitsForSmallEncoding>(offset)
+             || (offset == kNoInlineInfo))) << offset;
+  size_t inline_entry = info.ComputeStackMapInlineInfoOffset();
+  info.HasSmallInlineInfo()
+      ? region_.StoreUnaligned<kSmallEncoding>(inline_entry, offset)
+      : region_.StoreUnaligned<kLargeEncoding>(inline_entry, offset);
+}
+
+uint32_t StackMap::GetRegisterMask(const CodeInfo& info) const {
+  return region_.LoadUnaligned<kLargeEncoding>(info.ComputeStackMapRegisterMaskOffset());
+}
+
+void StackMap::SetRegisterMask(const CodeInfo& info, uint32_t mask) {
+  region_.StoreUnaligned<kLargeEncoding>(info.ComputeStackMapRegisterMaskOffset(), mask);
+}
+
+size_t StackMap::ComputeStackMapSize(size_t stack_mask_size,
+                                     bool has_inline_info,
+                                     bool is_small_inline_info,
+                                     bool is_small_dex_map,
+                                     bool is_small_dex_pc,
+                                     bool is_small_native_pc) {
+  return StackMap::kFixedSize
+      + stack_mask_size
+      + (has_inline_info ? NumberOfBytesForEntry(is_small_inline_info) : 0)
+      + NumberOfBytesForEntry(is_small_dex_map)
+      + NumberOfBytesForEntry(is_small_dex_pc)
+      + NumberOfBytesForEntry(is_small_native_pc);
+}
+
+size_t StackMap::ComputeStackMapSize(size_t stack_mask_size,
+                                     size_t inline_info_size,
+                                     size_t dex_register_map_size,
+                                     size_t dex_pc_max,
+                                     size_t native_pc_max) {
+  return ComputeStackMapSize(
+      stack_mask_size,
+      inline_info_size != 0,
+      // + 1 to also encode kNoInlineInfo.
+      IsUint<kBitsForSmallEncoding>(inline_info_size + dex_register_map_size + 1),
+      // + 1 to also encode kNoDexRegisterMap.
+      IsUint<kBitsForSmallEncoding>(dex_register_map_size + 1),
+      IsUint<kBitsForSmallEncoding>(dex_pc_max),
+      IsUint<kBitsForSmallEncoding>(native_pc_max));
+}
+
+MemoryRegion StackMap::GetStackMask(const CodeInfo& info) const {
+  return region_.Subregion(info.ComputeStackMapStackMaskOffset(), info.GetStackMaskSize());
+}
+
+void CodeInfo::DumpStackMapHeader(std::ostream& os, size_t stack_map_num) const {
+  StackMap stack_map = GetStackMapAt(stack_map_num);
+  os << "    StackMap " << stack_map_num
+     << std::hex
+     << " (dex_pc=0x" << stack_map.GetDexPc(*this)
+     << ", native_pc_offset=0x" << stack_map.GetNativePcOffset(*this)
+     << ", dex_register_map_offset=0x" << stack_map.GetDexRegisterMapOffset(*this)
+     << ", inline_info_offset=0x" << stack_map.GetInlineDescriptorOffset(*this)
+     << ", register_mask=0x" << stack_map.GetRegisterMask(*this)
+     << std::dec
+     << ", stack_mask=0b";
+  MemoryRegion stack_mask = stack_map.GetStackMask(*this);
+  for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) {
+    os << stack_mask.LoadBit(e - i - 1);
+  }
+  os << ")\n";
+};
+
+void CodeInfo::Dump(std::ostream& os, uint16_t number_of_dex_registers) const {
+  uint32_t code_info_size = GetOverallSize();
+  size_t number_of_stack_maps = GetNumberOfStackMaps();
+  os << "  Optimized CodeInfo (size=" << code_info_size
+     << ", number_of_dex_registers=" << number_of_dex_registers
+     << ", number_of_stack_maps=" << number_of_stack_maps
+     << ", has_inline_info=" << HasInlineInfo()
+     << ", has_small_inline_info=" << HasSmallInlineInfo()
+     << ", has_small_dex_register_map=" << HasSmallDexRegisterMap()
+     << ", has_small_dex_pc=" << HasSmallDexPc()
+     << ", has_small_native_pc=" << HasSmallNativePc()
+     << ")\n";
+
+  // Display stack maps along with Dex register maps.
+  for (size_t i = 0; i < number_of_stack_maps; ++i) {
+    StackMap stack_map = GetStackMapAt(i);
+    DumpStackMapHeader(os, i);
+    if (stack_map.HasDexRegisterMap(*this)) {
+      DexRegisterMap dex_register_map = GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+      // TODO: Display the bit mask of live Dex registers.
+      for (size_t j = 0; j < number_of_dex_registers; ++j) {
+        if (dex_register_map.IsDexRegisterLive(j)) {
+          DexRegisterLocation location =
+              dex_register_map.GetLocationKindAndValue(j, number_of_dex_registers);
+           os << "      " << "v" << j << ": "
+              << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind())
+              << " (" << location.GetValue() << ")" << '\n';
+        }
+      }
+    }
+  }
+  // TODO: Dump the stack map's inline information.
+}
+
+}  // namespace art
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 961772c..6ec7cc8 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -34,6 +34,8 @@
 // Size of Dex virtual registers.
 static size_t constexpr kVRegSize = 4;
 
+class CodeInfo;
+
 /**
  * Classes in the following file are wrapper on stack map information backed
  * by a MemoryRegion. As such they read and write to the region, they don't have
@@ -210,6 +212,14 @@
   // Get the actual kind of the location.
   Kind GetInternalKind() const { return kind_; }
 
+  bool operator==(DexRegisterLocation other) const {
+    return kind_ == other.kind_ && value_ == other.value_;
+  }
+
+  bool operator!=(DexRegisterLocation other) const {
+    return !(*this == other);
+  }
+
  private:
   Kind kind_;
   int32_t value_;
@@ -515,63 +525,41 @@
  public:
   explicit StackMap(MemoryRegion region) : region_(region) {}
 
-  uint32_t GetDexPc() const {
-    return region_.LoadUnaligned<uint32_t>(kDexPcOffset);
-  }
+  uint32_t GetDexPc(const CodeInfo& info) const;
 
-  void SetDexPc(uint32_t dex_pc) {
-    region_.StoreUnaligned<uint32_t>(kDexPcOffset, dex_pc);
-  }
+  void SetDexPc(const CodeInfo& info, uint32_t dex_pc);
 
-  uint32_t GetNativePcOffset() const {
-    return region_.LoadUnaligned<uint32_t>(kNativePcOffsetOffset);
-  }
+  uint32_t GetNativePcOffset(const CodeInfo& info) const;
 
-  void SetNativePcOffset(uint32_t native_pc_offset) {
-    region_.StoreUnaligned<uint32_t>(kNativePcOffsetOffset, native_pc_offset);
-  }
+  void SetNativePcOffset(const CodeInfo& info, uint32_t native_pc_offset);
 
-  uint32_t GetDexRegisterMapOffset() const {
-    return region_.LoadUnaligned<uint32_t>(kDexRegisterMapOffsetOffset);
-  }
+  uint32_t GetDexRegisterMapOffset(const CodeInfo& info) const;
 
-  void SetDexRegisterMapOffset(uint32_t offset) {
-    region_.StoreUnaligned<uint32_t>(kDexRegisterMapOffsetOffset, offset);
-  }
+  void SetDexRegisterMapOffset(const CodeInfo& info, uint32_t offset);
 
-  uint32_t GetInlineDescriptorOffset() const {
-    return region_.LoadUnaligned<uint32_t>(kInlineDescriptorOffsetOffset);
-  }
+  uint32_t GetInlineDescriptorOffset(const CodeInfo& info) const;
 
-  void SetInlineDescriptorOffset(uint32_t offset) {
-    region_.StoreUnaligned<uint32_t>(kInlineDescriptorOffsetOffset, offset);
-  }
+  void SetInlineDescriptorOffset(const CodeInfo& info, uint32_t offset);
 
-  uint32_t GetRegisterMask() const {
-    return region_.LoadUnaligned<uint32_t>(kRegisterMaskOffset);
-  }
+  uint32_t GetRegisterMask(const CodeInfo& info) const;
 
-  void SetRegisterMask(uint32_t mask) {
-    region_.StoreUnaligned<uint32_t>(kRegisterMaskOffset, mask);
-  }
+  void SetRegisterMask(const CodeInfo& info, uint32_t mask);
 
-  MemoryRegion GetStackMask() const {
-    return region_.Subregion(kStackMaskOffset, StackMaskSize());
-  }
+  MemoryRegion GetStackMask(const CodeInfo& info) const;
 
-  void SetStackMask(const BitVector& sp_map) {
-    MemoryRegion region = GetStackMask();
+  void SetStackMask(const CodeInfo& info, const BitVector& sp_map) {
+    MemoryRegion region = GetStackMask(info);
     for (size_t i = 0; i < region.size_in_bits(); i++) {
       region.StoreBit(i, sp_map.IsBitSet(i));
     }
   }
 
-  bool HasDexRegisterMap() const {
-    return GetDexRegisterMapOffset() != kNoDexRegisterMap;
+  bool HasDexRegisterMap(const CodeInfo& info) const {
+    return GetDexRegisterMapOffset(info) != kNoDexRegisterMap;
   }
 
-  bool HasInlineInfo() const {
-    return GetInlineDescriptorOffset() != kNoInlineInfo;
+  bool HasInlineInfo(const CodeInfo& info) const {
+    return GetInlineDescriptorOffset(info) != kNoInlineInfo;
   }
 
   bool Equals(const StackMap& other) const {
@@ -579,32 +567,51 @@
        && region_.size() == other.region_.size();
   }
 
-  static size_t ComputeStackMapSize(size_t stack_mask_size) {
-    return StackMap::kFixedSize + stack_mask_size;
-  }
+  static size_t ComputeStackMapSize(size_t stack_mask_size,
+                                    bool has_inline_info,
+                                    bool is_small_inline_info,
+                                    bool is_small_dex_map,
+                                    bool is_small_dex_pc,
+                                    bool is_small_native_pc);
+
+  static size_t ComputeStackMapSize(size_t stack_mask_size,
+                                    size_t inline_info_size,
+                                    size_t dex_register_map_size,
+                                    size_t dex_pc_max,
+                                    size_t native_pc_max);
+
+  // TODO: Revisit this abstraction if we allow 3 bytes encoding.
+  typedef uint8_t kSmallEncoding;
+  typedef uint32_t kLargeEncoding;
+  static constexpr size_t kBytesForSmallEncoding = sizeof(kSmallEncoding);
+  static constexpr size_t kBitsForSmallEncoding = kBitsPerByte * kBytesForSmallEncoding;
+  static constexpr size_t kBytesForLargeEncoding = sizeof(kLargeEncoding);
+  static constexpr size_t kBitsForLargeEncoding = kBitsPerByte * kBytesForLargeEncoding;
 
   // Special (invalid) offset for the DexRegisterMapOffset field meaning
   // that there is no Dex register map for this stack map.
   static constexpr uint32_t kNoDexRegisterMap = -1;
+  static constexpr uint32_t kNoDexRegisterMapSmallEncoding =
+      std::numeric_limits<kSmallEncoding>::max();
 
   // Special (invalid) offset for the InlineDescriptorOffset field meaning
   // that there is no inline info for this stack map.
   static constexpr uint32_t kNoInlineInfo = -1;
+  static constexpr uint32_t kNoInlineInfoSmallEncoding =
+    std::numeric_limits<kSmallEncoding>::max();
+
+  // Returns the number of bytes needed for an entry in the StackMap.
+  static size_t NumberOfBytesForEntry(bool small_encoding) {
+    return small_encoding ? kBytesForSmallEncoding : kBytesForLargeEncoding;
+  }
 
  private:
   // TODO: Instead of plain types such as "uint32_t", introduce
   // typedefs (and document the memory layout of StackMap).
-  static constexpr int kDexPcOffset = 0;
-  static constexpr int kNativePcOffsetOffset = kDexPcOffset + sizeof(uint32_t);
-  static constexpr int kDexRegisterMapOffsetOffset = kNativePcOffsetOffset + sizeof(uint32_t);
-  static constexpr int kInlineDescriptorOffsetOffset =
-      kDexRegisterMapOffsetOffset + sizeof(uint32_t);
-  static constexpr int kRegisterMaskOffset = kInlineDescriptorOffsetOffset + sizeof(uint32_t);
+  static constexpr int kRegisterMaskOffset = 0;
   static constexpr int kFixedSize = kRegisterMaskOffset + sizeof(uint32_t);
   static constexpr int kStackMaskOffset = kFixedSize;
 
-  size_t StackMaskSize() const { return region_.size() - kFixedSize; }
-
   MemoryRegion region_;
 
   friend class CodeInfo;
@@ -626,6 +633,80 @@
     region_ = MemoryRegion(const_cast<void*>(data), size);
   }
 
+  void SetEncoding(size_t inline_info_size,
+                   size_t dex_register_map_size,
+                   size_t dex_pc_max,
+                   size_t native_pc_max) {
+    if (inline_info_size != 0) {
+      region_.StoreBit(kHasInlineInfoBitOffset, 1);
+      region_.StoreBit(kHasSmallInlineInfoBitOffset, IsUint<StackMap::kBitsForSmallEncoding>(
+          // + 1 to also encode kNoInlineInfo: if an inline info offset
+          // is at 0xFF, we want to overflow to a larger encoding, because it will
+          // conflict with kNoInlineInfo.
+          // The offset is relative to the dex register map. TODO: Change this.
+          inline_info_size + dex_register_map_size + 1));
+    } else {
+      region_.StoreBit(kHasInlineInfoBitOffset, 0);
+      region_.StoreBit(kHasSmallInlineInfoBitOffset, 0);
+    }
+    region_.StoreBit(kHasSmallDexRegisterMapBitOffset,
+                     // + 1 to also encode kNoDexRegisterMap: if a dex register map offset
+                     // is at 0xFF, we want to overflow to a larger encoding, because it will
+                     // conflict with kNoDexRegisterMap.
+                     IsUint<StackMap::kBitsForSmallEncoding>(dex_register_map_size + 1));
+    region_.StoreBit(kHasSmallDexPcBitOffset, IsUint<StackMap::kBitsForSmallEncoding>(dex_pc_max));
+    region_.StoreBit(kHasSmallNativePcBitOffset,
+                     IsUint<StackMap::kBitsForSmallEncoding>(native_pc_max));
+  }
+
+  bool HasInlineInfo() const {
+    return region_.LoadBit(kHasInlineInfoBitOffset);
+  }
+
+  bool HasSmallInlineInfo() const {
+    return region_.LoadBit(kHasSmallInlineInfoBitOffset);
+  }
+
+  bool HasSmallDexRegisterMap() const {
+    return region_.LoadBit(kHasSmallDexRegisterMapBitOffset);
+  }
+
+  bool HasSmallNativePc() const {
+    return region_.LoadBit(kHasSmallNativePcBitOffset);
+  }
+
+  bool HasSmallDexPc() const {
+    return region_.LoadBit(kHasSmallDexPcBitOffset);
+  }
+
+  size_t ComputeStackMapRegisterMaskOffset() const {
+    return StackMap::kRegisterMaskOffset;
+  }
+
+  size_t ComputeStackMapStackMaskOffset() const {
+    return StackMap::kStackMaskOffset;
+  }
+
+  size_t ComputeStackMapDexPcOffset() const {
+    return ComputeStackMapStackMaskOffset() + GetStackMaskSize();
+  }
+
+  size_t ComputeStackMapNativePcOffset() const {
+    return ComputeStackMapDexPcOffset()
+        + (HasSmallDexPc() ? sizeof(uint8_t) : sizeof(uint32_t));
+  }
+
+  size_t ComputeStackMapDexRegisterMapOffset() const {
+    return ComputeStackMapNativePcOffset()
+        + (HasSmallNativePc() ? sizeof(uint8_t) : sizeof(uint32_t));
+  }
+
+  size_t ComputeStackMapInlineInfoOffset() const {
+    CHECK(HasInlineInfo());
+    return ComputeStackMapDexRegisterMapOffset()
+        + (HasSmallDexRegisterMap() ? sizeof(uint8_t) : sizeof(uint32_t));
+  }
+
   StackMap GetStackMapAt(size_t i) const {
     size_t size = StackMapSize();
     return StackMap(GetStackMaps().Subregion(i * size, size));
@@ -658,7 +739,12 @@
   // Get the size of one stack map of this CodeInfo object, in bytes.
   // All stack maps of a CodeInfo have the same size.
   size_t StackMapSize() const {
-    return StackMap::ComputeStackMapSize(GetStackMaskSize());
+    return StackMap::ComputeStackMapSize(GetStackMaskSize(),
+                                         HasInlineInfo(),
+                                         HasSmallInlineInfo(),
+                                         HasSmallDexRegisterMap(),
+                                         HasSmallDexPc(),
+                                         HasSmallNativePc());
   }
 
   // Get the size all the stack maps of this CodeInfo object, in bytes.
@@ -666,21 +752,25 @@
     return StackMapSize() * GetNumberOfStackMaps();
   }
 
+  size_t GetDexRegisterMapsOffset() const {
+    return CodeInfo::kFixedSize + StackMapsSize();
+  }
+
   uint32_t GetStackMapsOffset() const {
     return kFixedSize;
   }
 
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, uint32_t number_of_dex_registers) const {
-    DCHECK(stack_map.HasDexRegisterMap());
-    uint32_t offset = stack_map.GetDexRegisterMapOffset();
+    DCHECK(stack_map.HasDexRegisterMap(*this));
+    uint32_t offset = stack_map.GetDexRegisterMapOffset(*this) + GetDexRegisterMapsOffset();
     size_t size = ComputeDexRegisterMapSize(offset, number_of_dex_registers);
     return DexRegisterMap(region_.Subregion(offset, size));
   }
 
   InlineInfo GetInlineInfoOf(StackMap stack_map) const {
-    DCHECK(stack_map.HasInlineInfo());
-    uint32_t offset = stack_map.GetInlineDescriptorOffset();
-    uint8_t depth = region_.Load<uint8_t>(offset);
+    DCHECK(stack_map.HasInlineInfo(*this));
+    uint32_t offset = stack_map.GetInlineDescriptorOffset(*this) + GetDexRegisterMapsOffset();
+    uint8_t depth = region_.LoadUnaligned<uint8_t>(offset);
     return InlineInfo(region_.Subregion(offset,
         InlineInfo::kFixedSize + depth * InlineInfo::SingleEntrySize()));
   }
@@ -688,7 +778,7 @@
   StackMap GetStackMapForDexPc(uint32_t dex_pc) const {
     for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i);
-      if (stack_map.GetDexPc() == dex_pc) {
+      if (stack_map.GetDexPc(*this) == dex_pc) {
         return stack_map;
       }
     }
@@ -700,7 +790,7 @@
     // TODO: stack maps are sorted by native pc, we can do a binary search.
     for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i);
-      if (stack_map.GetNativePcOffset() == native_pc_offset) {
+      if (stack_map.GetNativePcOffset(*this) == native_pc_offset) {
         return stack_map;
       }
     }
@@ -708,14 +798,24 @@
     UNREACHABLE();
   }
 
+  void Dump(std::ostream& os, uint16_t number_of_dex_registers) const;
+  void DumpStackMapHeader(std::ostream& os, size_t stack_map_num) const;
+
  private:
   // TODO: Instead of plain types such as "uint32_t", introduce
   // typedefs (and document the memory layout of CodeInfo).
   static constexpr int kOverallSizeOffset = 0;
-  static constexpr int kNumberOfStackMapsOffset = kOverallSizeOffset + sizeof(uint32_t);
+  static constexpr int kEncodingInfoOffset = kOverallSizeOffset + sizeof(uint32_t);
+  static constexpr int kNumberOfStackMapsOffset = kEncodingInfoOffset + sizeof(uint8_t);
   static constexpr int kStackMaskSizeOffset = kNumberOfStackMapsOffset + sizeof(uint32_t);
   static constexpr int kFixedSize = kStackMaskSizeOffset + sizeof(uint32_t);
 
+  static constexpr int kHasInlineInfoBitOffset = (kEncodingInfoOffset * kBitsPerByte);
+  static constexpr int kHasSmallInlineInfoBitOffset = kHasInlineInfoBitOffset + 1;
+  static constexpr int kHasSmallDexRegisterMapBitOffset = kHasSmallInlineInfoBitOffset + 1;
+  static constexpr int kHasSmallDexPcBitOffset = kHasSmallDexRegisterMapBitOffset + 1;
+  static constexpr int kHasSmallNativePcBitOffset = kHasSmallDexPcBitOffset + 1;
+
   MemoryRegion GetStackMaps() const {
     return region_.size() == 0
         ? MemoryRegion()
diff --git a/runtime/thread.cc b/runtime/thread.cc
index affb6cd..e1a07e9 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2155,8 +2155,9 @@
         Runtime* runtime = Runtime::Current();
         const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
         uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
-        StackMap map = m->GetStackMap(native_pc_offset);
-        MemoryRegion mask = map.GetStackMask();
+        CodeInfo code_info = m->GetOptimizedCodeInfo();
+        StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+        MemoryRegion mask = map.GetStackMask(code_info);
         // Visit stack entries that hold pointers.
         for (size_t i = 0; i < mask.size_in_bits(); ++i) {
           if (mask.LoadBit(i)) {
@@ -2173,7 +2174,7 @@
           }
         }
         // Visit callee-save registers that hold pointers.
-        uint32_t register_mask = map.GetRegisterMask();
+        uint32_t register_mask = map.GetRegisterMask(code_info);
         for (size_t i = 0; i < BitSizeOf<uint32_t>(); ++i) {
           if (register_mask & (1 << i)) {
             mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(i));
diff --git a/runtime/thread.h b/runtime/thread.h
index da7af83..9d4d89d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -573,6 +573,21 @@
         OFFSETOF_MEMBER(tls_ptr_sized_values, suspend_trigger));
   }
 
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalPosOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_pos));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalEndOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_end));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalObjectsOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
+  }
+
   // Size of stack less any space reserved for stack overflow
   size_t GetStackSize() const {
     return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 39c8d15..3d13c3e 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -67,15 +67,39 @@
 
 void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
   while (char_count--) {
-    uint16_t ch = *utf16_in++;
+    const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
+      // char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
       if (ch > 0x07ff) {
+        // Three byte encoding.
         *utf8_out++ = (ch >> 12) | 0xe0;
         *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
         *utf8_out++ = (ch & 0x3f) | 0x80;
       } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
         *utf8_out++ = (ch >> 6) | 0xc0;
         *utf8_out++ = (ch & 0x3f) | 0x80;
       }
@@ -147,15 +171,32 @@
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
   while (char_count--) {
-    uint16_t ch = *chars++;
+    const uint16_t ch = *chars++;
     if (ch > 0 && ch <= 0x7f) {
       ++result;
-    } else {
-      if (ch > 0x7ff) {
-        result += 3;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
       } else {
-        result += 2;
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
       }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
     }
   }
   return result;
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 8048bbd..94a6ea5 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,8 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <vector>
+
 namespace art {
 
 class UtfTest : public CommonRuntimeTest {};
@@ -110,4 +112,52 @@
   EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding)));
 }
 
+static void AssertConversion(const std::vector<uint16_t> input,
+                             const std::vector<uint8_t> expected) {
+  ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
+
+  std::vector<uint8_t> output(expected.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  EXPECT_EQ(expected, output);
+}
+
+TEST_F(UtfTest, CountAndConvertUtf8Bytes) {
+  // Surrogate pairs will be converted into 4 byte sequences.
+  AssertConversion({ 0xd801, 0xdc00 }, { 0xf0, 0x90, 0x90, 0x80 });
+
+  // Three byte encodings that are below & above the leading surrogate
+  // range respectively.
+  AssertConversion({ 0xdef0 }, { 0xed, 0xbb, 0xb0 });
+  AssertConversion({ 0xdcff }, { 0xed, 0xb3, 0xbf });
+  // Two byte encoding.
+  AssertConversion({ 0x0101 }, { 0xc4, 0x81 });
+
+  // Two byte special case : 0 must use an overlong encoding.
+  AssertConversion({ 0x0101, 0x0000 }, { 0xc4, 0x81, 0xc0, 0x80 });
+
+  // One byte encoding.
+  AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
+
+  AssertConversion({
+      0xd802, 0xdc02,  // Surrogate pair
+      0xdef0, 0xdcff,  // Three byte encodings
+      0x0101, 0x0000,  // Two byte encodings
+      'p'   , 'p'      // One byte encoding
+    }, {
+      0xf0, 0x90, 0xa0, 0x82,
+      0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
+      0xc4, 0x81, 0xc0, 0x80,
+      0x70, 0x70
+    });
+}
+
+TEST_F(UtfTest, CountAndConvertUtf8Bytes_UnpairedSurrogate) {
+  // Unpaired trailing surrogate at the end of input.
+  AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
+  // Unpaired (or incorrectly paired) surrogates in the middle of the input.
+  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
+  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
+  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 9a9f51a..e20412e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -311,19 +311,6 @@
   return (ch < ' ' || ch > '~');
 }
 
-// Interpret the bit pattern of input (type U) as type V. Requires the size
-// of V >= size of U (compile-time checked).
-template<typename U, typename V>
-static inline V bit_cast(U in) {
-  static_assert(sizeof(U) <= sizeof(V), "Size of U not <= size of V");
-  union {
-    U u;
-    V v;
-  } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 9ceb6f4..47e9bf5 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1086,7 +1086,7 @@
   const uint16_t* insns = code_item_->insns_ + cur_offset;
   /* make sure the start of the switch is in range */
   int32_t switch_offset = insns[1] | ((int32_t) insns[2]) << 16;
-  if ((int32_t) cur_offset + switch_offset < 0 || cur_offset + switch_offset + 2 >= insn_count) {
+  if ((int32_t) cur_offset + switch_offset < 0 || cur_offset + switch_offset + 2 > insn_count) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid switch start: at " << cur_offset
                                       << ", switch offset " << switch_offset
                                       << ", count " << insn_count;
@@ -2742,9 +2742,17 @@
       break;
 
     // Special instructions.
-    case Instruction::RETURN_VOID_BARRIER:
-      if (!IsConstructor() || IsStatic()) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-barrier not expected";
+    case Instruction::RETURN_VOID_NO_BARRIER:
+      if (IsConstructor() && !IsStatic()) {
+        auto& declaring_class = GetDeclaringClass();
+        auto* klass = declaring_class.GetClass();
+        for (uint32_t i = 0, num_fields = klass->NumInstanceFields(); i < num_fields; ++i) {
+          if (klass->GetInstanceField(i)->IsFinal()) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-no-barrier not expected for "
+                << PrettyField(klass->GetInstanceField(i));
+            break;
+          }
+        }
       }
       break;
     // Note: the following instructions encode offsets derived from class linking.
@@ -3017,7 +3025,7 @@
       // For returns we only care about the operand to the return, all other registers are dead.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn_idx);
       Instruction::Code opcode = ret_inst->Opcode();
-      if ((opcode == Instruction::RETURN_VOID) || (opcode == Instruction::RETURN_VOID_BARRIER)) {
+      if (opcode == Instruction::RETURN_VOID || opcode == Instruction::RETURN_VOID_NO_BARRIER) {
         SafelyMarkAllRegistersAsConflicts(this, work_line_.get());
       } else {
         if (opcode == Instruction::RETURN_WIDE) {
@@ -4163,7 +4171,7 @@
       // Initialize them as conflicts so they don't add to GC and deoptimization information.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn);
       Instruction::Code opcode = ret_inst->Opcode();
-      if ((opcode == Instruction::RETURN_VOID) || (opcode == Instruction::RETURN_VOID_BARRIER)) {
+      if (opcode == Instruction::RETURN_VOID || opcode == Instruction::RETURN_VOID_NO_BARRIER) {
         SafelyMarkAllRegistersAsConflicts(this, target_line);
       } else {
         target_line->CopyFromLine(merge_line);
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 78185bf..00b4cef 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -269,7 +269,7 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 40be56c..76ef4a9 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -57,14 +57,15 @@
       // We eliminate the non-live registers at a return, so only v3 is live.
       // Note that it is OK for a compiler to not have a dex map at this dex PC because
       // a return is not necessarily a safepoint.
-      CHECK_REGS_CONTAIN_REFS(0x13U, false);  // v3: y
-      CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x13U, false, 3);  // v3: y
+      // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
+      CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
       // v5 is removed from the root set because there is a "merge" operation.
       // See 0015: if-nez v2, 001f.
-      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
-      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
+      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
       CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
diff --git a/test/134-reg-promotion/smali/Test.smali b/test/134-reg-promotion/smali/Test.smali
index 6a35c45..68d29dd 100644
--- a/test/134-reg-promotion/smali/Test.smali
+++ b/test/134-reg-promotion/smali/Test.smali
@@ -36,3 +36,28 @@
    :end
    return-void
 .end method
+
+.method public static run2()V
+   .registers 4
+   new-instance v2, Ljava/lang/String;
+   invoke-direct {v2}, Ljava/lang/String;-><init>()V
+   const/4 v0, 0
+   move v1, v0
+   :start
+   invoke-static {}, LMain;->blowup()V
+   if-ne v1, v0, :end
+   const/4 v2, 1
+   invoke-static {v2}, Ljava/lang/Integer;->toString(I)Ljava/lang/String;
+   move-result-object v3
+   if-nez v3, :skip
+   const/4 v0, 0
+   :skip
+   # The Phi merging 0 with 0 hides the constant from the Quick compiler.
+   move v2, v0
+   # The call makes v2 float type.
+   invoke-static {v2}, Ljava/lang/Float;->isNaN(F)Z
+   const/4 v1, 1
+   goto :start
+   :end
+   return-void
+.end method
diff --git a/test/134-reg-promotion/src/Main.java b/test/134-reg-promotion/src/Main.java
index d45ec66..008ac58 100644
--- a/test/134-reg-promotion/src/Main.java
+++ b/test/134-reg-promotion/src/Main.java
@@ -38,5 +38,11 @@
             m.invoke(null, (Object[]) null);
             holder = null;
         }
+        m = c.getMethod("run2", (Class[]) null);
+        for (int i = 0; i < 10; i++) {
+            holder = new char[128 * 1024][];
+            m.invoke(null, (Object[]) null);
+            holder = null;
+        }
     }
 }
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 937d2fe..6b4bc11 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -55,7 +55,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 5, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(1.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(1.0f);
       CHECK_EQ(value, cast);
 
       CHECK(GetVReg(m, 6, kIntVReg, &value));
@@ -95,7 +95,7 @@
       CHECK_EQ(value, 0u);
 
       CHECK(GetVRegPair(m, 13, kDoubleLoVReg, kDoubleHiVReg, &value));
-      uint64_t cast = bit_cast<double, uint64_t>(2.0);
+      uint64_t cast = bit_cast<uint64_t, double>(2.0);
       CHECK_EQ(value, cast);
     }
 
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
index 24d7832..0a83ac0 100644
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ b/test/455-set-vreg/set_vreg_jni.cc
@@ -60,21 +60,21 @@
       CHECK(GetVReg(m, 1, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVReg(m, 2, bit_cast<float, uint32_t>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<float, uint32_t>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<float, uint32_t>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<float, uint32_t>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<float, uint32_t>(1.0f), kFloatVReg));
+      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
+      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
+      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
+      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
+      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
     } else if (m_name.compare("testDoubleVReg") == 0) {
       uint32_t value = 0;
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVRegPair(m, 4, bit_cast<double, uint64_t>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<double, uint64_t>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<double, uint64_t>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<double, uint64_t>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<double, uint64_t>(1.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
     }
 
     return true;
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index ce701e8..1b32348 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -69,7 +69,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(4.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(4.0f);
       CHECK_EQ(value, cast);
       did_check_ = true;
     } else if (m_name.compare("phiEquivalent") == 0) {
@@ -138,7 +138,7 @@
   std::unique_ptr<Context> context(Context::Create());
   CHECK(soa.Decode<mirror::Object*>(main) == nullptr);
   CHECK_EQ(int_value, 0);
-  int32_t cast = bit_cast<float, int32_t>(float_value);
+  int32_t cast = bit_cast<int32_t, float>(float_value);
   CHECK_EQ(cast, 0);
   TestVisitor visitor(soa.Self(), context.get());
   visitor.WalkStack();
diff --git a/test/463-checker-boolean-simplifier/expected.txt b/test/463-checker-boolean-simplifier/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/expected.txt
diff --git a/test/463-checker-boolean-simplifier/info.txt b/test/463-checker-boolean-simplifier/info.txt
new file mode 100644
index 0000000..9c0493a
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/info.txt
@@ -0,0 +1 @@
+Tests simplification of boolean NOT in optimizing compiler.
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
new file mode 100644
index 0000000..efe0d3f
--- /dev/null
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -0,0 +1,174 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // Note #1: `javac` flips the conditions of If statements.
+  // Note #2: In the optimizing compiler, the first input of Phi is always
+  //          the fall-through path, i.e. the false branch.
+
+  public static void assertBoolEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /*
+   * Elementary test negating a boolean. Verifies that the condition is replaced,
+   * blocks merged and empty branches removed.
+   */
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[NotEq:z\d+]]    NotEqual [ [[Param]] [[Const0]] ]
+  // CHECK-DAG:                       If [ [[NotEq]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
+  // CHECK:                           Goto
+  // CHECK:                           Goto
+  // CHECK:                           Goto
+  // CHECK-NOT:                       Goto
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Eq:z\d+]]       Equal [ [[Param]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Eq]] ]
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK-NOT:                       NotEqual
+  // CHECK-NOT:                       If
+  // CHECK-NOT:                       Phi
+
+  // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
+  // CHECK:                           Goto
+  // CHECK-NOT:                       Goto
+
+  public static boolean BooleanNot(boolean x) {
+    return !x;
+  }
+
+  /*
+   * Program which only delegates the condition, i.e. returns 1 when True
+   * and 0 when False.
+   */
+
+  // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const0]] [[Const1]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       Return [ [[Cond]] ]
+
+  public static boolean GreaterThan(int x, int y) {
+    return (x <= y) ? false : true;
+  }
+
+  /*
+   * Program which negates a condition, i.e. returns 0 when True
+   * and 1 when False.
+   */
+
+  // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThanOrEqual [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Cond:z\d+]]     LessThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       Return [ [[Cond]] ]
+
+  public static boolean LessThan(int x, int y) {
+    return (x < y) ? true : false;
+  }
+
+  /*
+   * Program which further uses negated conditions.
+   * Note that Phis are discovered retrospectively.
+   */
+
+  // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (before)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[CondXY:z\d+]]   GreaterThan [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:                       If [ [[CondXY]] ]
+  // CHECK-DAG:     [[CondYZ:z\d+]]   GreaterThan [ [[ParamY]] [[ParamZ]] ]
+  // CHECK-DAG:                       If [ [[CondYZ]] ]
+  // CHECK-DAG:     [[CondXYZ:z\d+]]  NotEqual [ [[PhiXY:i\d+]] [[PhiYZ:i\d+]] ]
+  // CHECK-DAG:                       If [ [[CondXYZ]] ]
+  // CHECK-DAG:                       Return [ [[PhiXYZ:i\d+]] ]
+  // CHECK-DAG:     [[PhiXY]]         Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     [[PhiYZ]]         Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     [[PhiXYZ]]        Phi [ [[Const1]] [[Const0]] ]
+
+  // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (after)
+  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
+  // CHECK-DAG:     [[CmpXY:z\d+]]    LessThanOrEqual [ [[ParamX]] [[ParamY]] ]
+  // CHECK-DAG:     [[CmpYZ:z\d+]]    LessThanOrEqual [ [[ParamY]] [[ParamZ]] ]
+  // CHECK-DAG:     [[CmpXYZ:z\d+]]   Equal [ [[CmpXY]] [[CmpYZ]] ]
+  // CHECK-DAG:                       Return [ [[CmpXYZ]] ]
+
+  public static boolean ValuesOrdered(int x, int y, int z) {
+    return (x <= y) == (y <= z);
+  }
+
+  public static void main(String[] args) {
+    assertBoolEquals(false, BooleanNot(true));
+    assertBoolEquals(true, BooleanNot(false));
+    assertBoolEquals(true, GreaterThan(10, 5));
+    assertBoolEquals(false, GreaterThan(10, 10));
+    assertBoolEquals(false, GreaterThan(5, 10));
+    assertBoolEquals(true, LessThan(5, 10));
+    assertBoolEquals(false, LessThan(10, 10));
+    assertBoolEquals(false, LessThan(10, 5));
+    assertBoolEquals(true, ValuesOrdered(1, 3, 5));
+    assertBoolEquals(true, ValuesOrdered(5, 3, 1));
+    assertBoolEquals(false, ValuesOrdered(1, 3, 2));
+    assertBoolEquals(false, ValuesOrdered(2, 3, 1));
+    assertBoolEquals(true, ValuesOrdered(3, 3, 3));
+    assertBoolEquals(true, ValuesOrdered(3, 3, 5));
+    assertBoolEquals(false, ValuesOrdered(5, 5, 3));
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 019dc14..5922257 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -14,4 +14,5 @@
 b/18800943 (2)
 MoveExc
 MoveExceptionOnEntry
+EmptySparseSwitch
 Done!
diff --git a/test/800-smali/smali/EmptySparseSwitch.smali b/test/800-smali/smali/EmptySparseSwitch.smali
new file mode 100644
index 0000000..29592c1
--- /dev/null
+++ b/test/800-smali/smali/EmptySparseSwitch.smali
@@ -0,0 +1,17 @@
+.class public LEmptySparseSwitch;
+
+.super Ljava/lang/Object;
+
+.method public static run()V
+    .registers 2
+
+    const v0, 0
+
+    sparse-switch v0, :SparseSwitch
+
+    return-void
+
+    :SparseSwitch
+    .sparse-switch
+    .end sparse-switch
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index b23896d..3e0b1f9 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -77,6 +77,8 @@
                 null));
         testCases.add(new TestCase("MoveExceptionOnEntry", "MoveExceptionOnEntry",
             "moveExceptionOnEntry", new Object[]{0}, new VerifyError(), null));
+        testCases.add(new TestCase("EmptySparseSwitch", "EmptySparseSwitch", "run", null, null,
+                null));
     }
 
     public void runTests() {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 9755efb..28fbc3e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -95,9 +95,9 @@
   RELOCATE_TYPES += no-relocate
 endif
 ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true)
-  RELOCATE_TYPES := relocate-no-patchoat
+  RELOCATE_TYPES := relocate-npatchoat
 endif
-TRACE_TYPES := no-trace
+TRACE_TYPES := ntrace
 ifeq ($(ART_TEST_TRACE),true)
   TRACE_TYPES += trace
 endif
@@ -119,7 +119,7 @@
 ifeq ($(ART_TEST_PIC_IMAGE),true)
   IMAGE_TYPES += picimage
 endif
-PICTEST_TYPES := nopictest
+PICTEST_TYPES := npictest
 ifeq ($(ART_TEST_PIC_TEST),true)
   PICTEST_TYPES += pictest
 endif
@@ -130,7 +130,7 @@
 ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true)
   RUN_TYPES += ndebug
 endif
-DEBUGGABLE_TYPES := nondebuggable
+DEBUGGABLE_TYPES := ndebuggable
 ifeq ($(ART_TEST_RUN_TEST_DEBUGGABLE),true)
 DEBUGGABLE_TYPES += debuggable
 endif
@@ -272,9 +272,9 @@
       $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
-ifneq (,$(filter relocate-no-patchoat,$(RELOCATE_TYPES)))
+ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), relocate-no-patchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(COMPILER_TYPES), relocate-npatchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -375,7 +375,7 @@
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),nondebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+      $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS :=
@@ -461,10 +461,10 @@
 
 # Create a rule to build and run a tests following the form:
 # test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
-#    {4: interpreter default optimizing jit}-{5: relocate no-relocate relocate-no-patchoat}-
-#    {6: trace or no-trace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
-#    {9: no-image image picimage}-{10: pictest nopictest}-
-#    {11: nondebuggable debuggable}-{12: test name}{13: 32 or 64}
+#    {4: interpreter default optimizing jit}-{5: relocate nrelocate relocate-npatchoat}-
+#    {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
+#    {9: no-image image picimage}-{10: pictest npictest}-
+#    {11: ndebuggable debuggable}-{12: test name}{13: 32 or 64}
 define define-test-art-run-test
   run_test_options :=
   prereq_rule :=
@@ -543,7 +543,7 @@
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_RELOCATE_RULES
       run_test_options += --no-relocate
     else
-      ifeq ($(5),relocate-no-patchoat)
+      ifeq ($(5),relocate-npatchoat)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_NO_PATCHOAT_RULES
         run_test_options += --relocate --no-patchoat
       else
@@ -555,7 +555,7 @@
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES
     run_test_options += --trace
   else
-    ifeq ($(6),no-trace)
+    ifeq ($(6),ntrace)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_TRACE_RULES
     else
       $$(error found $(6) expected $(TRACE_TYPES))
@@ -635,7 +635,7 @@
   ifeq ($(10),pictest)
     run_test_options += --pic-test
   else
-    ifeq ($(10),nopictest)
+    ifeq ($(10),npictest)
       # Nothing to be done.
     else
       $$(error found $(10) expected $(PICTEST_TYPES))
@@ -645,7 +645,7 @@
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUGGABLE_RULES
     run_test_options += --debuggable
   else
-    ifeq ($(11),nondebuggable)
+    ifeq ($(11),ndebuggable)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NONDEBUGGABLE_RULES
       # Nothing to be done.
     else
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 840ff80..414e4df 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -267,7 +267,7 @@
 if [ "$JIT" = "y" ]; then
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
     else
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
diff --git a/test/run-test b/test/run-test
index df0fce4..2873a35 100755
--- a/test/run-test
+++ b/test/run-test
@@ -441,8 +441,8 @@
         echo "    --build-only          Build test files only (off by default)."
         echo "    --interpreter         Enable interpreter only mode (off by default)."
         echo "    --jit                 Enable jit (off by default)."
-        echo "    --optimizing          Enable optimizing compiler (off by default)."
-        echo "    --quick               Use Quick compiler (default)."
+        echo "    --optimizing          Enable optimizing compiler (default)."
+        echo "    --quick               Use Quick compiler (off by default)."
         echo "    --no-verify           Turn off verification (on by default)."
         echo "    --no-optimize         Turn off optimization (on by default)."
         echo "    --no-precise          Turn off precise GC (on by default)."
diff --git a/tools/dexfuzz/src/dexfuzz/rawdex/Instruction.java b/tools/dexfuzz/src/dexfuzz/rawdex/Instruction.java
index 2dda78f..adafa62 100644
--- a/tools/dexfuzz/src/dexfuzz/rawdex/Instruction.java
+++ b/tools/dexfuzz/src/dexfuzz/rawdex/Instruction.java
@@ -434,7 +434,7 @@
     addOpcodeInfo(Opcode.INVOKE_DIRECT, "invoke-direct", 0x70, new Format35c());
     addOpcodeInfo(Opcode.INVOKE_STATIC, "invoke-static", 0x71, new Format35c());
     addOpcodeInfo(Opcode.INVOKE_INTERFACE, "invoke-interface", 0x72, new Format35c());
-    addOpcodeInfo(Opcode.RETURN_VOID_BARRIER, "return-void-barrier", 0x73, new Format10x());
+    addOpcodeInfo(Opcode.RETURN_VOID_NO_BARRIER, "return-void-no-barrier", 0x73, new Format10x());
     addOpcodeInfo(Opcode.INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", 0x74, new Format3rc());
     addOpcodeInfo(Opcode.INVOKE_SUPER_RANGE, "invoke-super/range", 0x75, new Format3rc());
     addOpcodeInfo(Opcode.INVOKE_DIRECT_RANGE, "invoke-direct/range", 0x76, new Format3rc());
diff --git a/tools/dexfuzz/src/dexfuzz/rawdex/Opcode.java b/tools/dexfuzz/src/dexfuzz/rawdex/Opcode.java
index 312e855..f7c7788 100644
--- a/tools/dexfuzz/src/dexfuzz/rawdex/Opcode.java
+++ b/tools/dexfuzz/src/dexfuzz/rawdex/Opcode.java
@@ -132,7 +132,7 @@
   INVOKE_DIRECT,
   INVOKE_STATIC,
   INVOKE_INTERFACE,
-  RETURN_VOID_BARRIER,
+  RETURN_VOID_NO_BARRIER,
   INVOKE_VIRTUAL_RANGE,
   INVOKE_SUPER_RANGE,
   INVOKE_DIRECT_RANGE,
@@ -277,4 +277,4 @@
   public static boolean isBetween(Opcode opcode, Opcode opcode1, Opcode opcode2) {
     return (opcode.ordinal() >= opcode1.ordinal() && opcode.ordinal() <= opcode2.ordinal());
   }
-}
\ No newline at end of file
+}