Merge "Move code related to debug info generation to its own directory."
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 3a9ce1b..97c60de 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -64,7 +64,8 @@
 
   virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
                           jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
-                          ArtMethod* method ATTRIBUTE_UNUSED)
+                          ArtMethod* method ATTRIBUTE_UNUSED,
+                          bool osr ATTRIBUTE_UNUSED)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     return false;
   }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 9305877..339ed73 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -60,11 +60,12 @@
   delete reinterpret_cast<JitCompiler*>(handle);
 }
 
-extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self)
+extern "C" bool jit_compile_method(
+    void* handle, ArtMethod* method, Thread* self, bool osr)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
   DCHECK(jit_compiler != nullptr);
-  return jit_compiler->CompileMethod(self, method);
+  return jit_compiler->CompileMethod(self, method, osr);
 }
 
 extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
@@ -201,7 +202,7 @@
   }
 }
 
-bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
+bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
   TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
   const uint64_t start_time = NanoTime();
   StackHandleScope<2> hs(self);
@@ -223,8 +224,8 @@
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
-    if (success && perf_file_ != nullptr) {
+    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr);
+    if (success && (perf_file_ != nullptr)) {
       const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
       std::ostringstream stream;
       stream << std::hex
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 037a18a..5294d0e 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -37,7 +37,7 @@
  public:
   static JitCompiler* Create();
   virtual ~JitCompiler();
-  bool CompileMethod(Thread* self, ArtMethod* method)
+  bool CompileMethod(Thread* self, ArtMethod* method, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   CompilerCallbacks* GetCompilerCallbacks() const;
   size_t GetTotalCompileTime() const {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c7430e7..8d77daf 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -72,74 +72,6 @@
   size_t index_;
 };
 
-class SwitchTable : public ValueObject {
- public:
-  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
-      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
-    int32_t table_offset = instruction.VRegB_31t();
-    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
-    if (sparse) {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
-    } else {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
-    }
-    num_entries_ = table[1];
-    values_ = reinterpret_cast<const int32_t*>(&table[2]);
-  }
-
-  uint16_t GetNumEntries() const {
-    return num_entries_;
-  }
-
-  void CheckIndex(size_t index) const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
-    }
-  }
-
-  int32_t GetEntryAt(size_t index) const {
-    CheckIndex(index);
-    return values_[index];
-  }
-
-  uint32_t GetDexPcForIndex(size_t index) const {
-    CheckIndex(index);
-    return dex_pc_ +
-        (reinterpret_cast<const int16_t*>(values_ + index) -
-         reinterpret_cast<const int16_t*>(&instruction_));
-  }
-
-  // Index of the first value in the table.
-  size_t GetFirstValueIndex() const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      return num_entries_;
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      return 1;
-    }
-  }
-
- private:
-  const Instruction& instruction_;
-  const uint32_t dex_pc_;
-
-  // Whether this is a sparse-switch table (or a packed-switch one).
-  const bool sparse_;
-
-  // This can't be const as it needs to be computed off of the given instruction, and complicated
-  // expressions in the initializer list seemed very ugly.
-  uint16_t num_entries_;
-
-  const int32_t* values_;
-
-  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
 void HGraphBuilder::InitializeLocals(uint16_t count) {
   graph_->SetNumberOfVRegs(count);
   locals_.resize(count);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 1d604e7..93e17d6 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -30,7 +30,6 @@
 namespace art {
 
 class Instruction;
-class SwitchTable;
 
 class HGraphBuilder : public ValueObject {
  public:
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a3bbfdb..e1b83f0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -629,8 +629,76 @@
   return stack_map_stream_.PrepareForFillIn();
 }
 
-void CodeGenerator::BuildStackMaps(MemoryRegion region) {
+static void CheckCovers(uint32_t dex_pc,
+                        const HGraph& graph,
+                        const CodeInfo& code_info,
+                        const ArenaVector<HSuspendCheck*>& loop_headers,
+                        ArenaVector<size_t>* covered) {
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+  for (size_t i = 0; i < loop_headers.size(); ++i) {
+    if (loop_headers[i]->GetDexPc() == dex_pc) {
+      if (graph.IsCompilingOsr()) {
+        DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+      }
+      ++(*covered)[i];
+    }
+  }
+}
+
+// Debug helper to ensure loop entries in compiled code are matched by
+// dex branch instructions.
+static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
+                                            const CodeInfo& code_info,
+                                            const DexFile::CodeItem& code_item) {
+  if (graph.HasTryCatch()) {
+    // One can write loops through try/catch, which we do not support for OSR anyway.
+    return;
+  }
+  ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
+  for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
+    if (it.Current()->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+      if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+        loop_headers.push_back(suspend_check);
+      }
+    }
+  }
+  ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc));
+  const uint16_t* code_ptr = code_item.insns_;
+  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+
+  size_t dex_pc = 0;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    if (instruction.IsBranch()) {
+      uint32_t target = dex_pc + instruction.GetTargetOffset();
+      CheckCovers(target, graph, code_info, loop_headers, &covered);
+    } else if (instruction.IsSwitch()) {
+      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+      uint16_t num_entries = table.GetNumEntries();
+      size_t offset = table.GetFirstValueIndex();
+
+      // Use a larger loop counter type to avoid overflow issues.
+      for (size_t i = 0; i < num_entries; ++i) {
+        // The target of the case.
+        uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+        CheckCovers(target, graph, code_info, loop_headers, &covered);
+      }
+    }
+    dex_pc += instruction.SizeInCodeUnits();
+    code_ptr += instruction.SizeInCodeUnits();
+  }
+
+  for (size_t i = 0; i < covered.size(); ++i) {
+    DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent";
+  }
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
   stack_map_stream_.FillIn(region);
+  if (kIsDebugBuild) {
+    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+  }
 }
 
 void CodeGenerator::RecordPcInfo(HInstruction* instruction,
@@ -705,6 +773,46 @@
 
   EmitEnvironment(instruction->GetEnvironment(), slow_path);
   stack_map_stream_.EndStackMapEntry();
+
+  HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+  if (instruction->IsSuspendCheck() &&
+      (info != nullptr) &&
+      graph_->IsCompilingOsr() &&
+      (inlining_depth == 0)) {
+    DCHECK_EQ(info->GetSuspendCheck(), instruction);
+    // We duplicate the stack map as a marker that this stack map can be an OSR entry.
+    // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
+    DCHECK(info->IsIrreducible());
+    stack_map_stream_.BeginStackMapEntry(
+        dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
+    EmitEnvironment(instruction->GetEnvironment(), slow_path);
+    stack_map_stream_.EndStackMapEntry();
+    if (kIsDebugBuild) {
+      HEnvironment* environment = instruction->GetEnvironment();
+      for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
+        HInstruction* in_environment = environment->GetInstructionAt(i);
+        if (in_environment != nullptr) {
+          DCHECK(in_environment->IsPhi() || in_environment->IsConstant());
+          Location location = environment->GetLocationAt(i);
+          DCHECK(location.IsStackSlot() ||
+                 location.IsDoubleStackSlot() ||
+                 location.IsConstant() ||
+                 location.IsInvalid());
+          if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+            DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize()));
+          }
+        }
+      }
+    }
+  } else if (kIsDebugBuild) {
+    // Ensure stack maps are unique, by checking that the native pc in the stack map
+    // last emitted is different than the native pc of the stack map just emitted.
+    size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+    if (number_of_stack_maps > 1) {
+      DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset,
+                stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset);
+    }
+  }
 }
 
 bool CodeGenerator::HasStackMapAtCurrentPc() {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4f8f146..0a688cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -288,7 +288,7 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void BuildStackMaps(MemoryRegion region);
+  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
   size_t ComputeStackMapsSize();
 
   bool IsLeafMethod() const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c2d9edd..e434932 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3750,6 +3750,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -3779,6 +3780,13 @@
   Primitive::Type type = compare->InputAt(0)->GetType();
   Condition less_cond;
   switch (type) {
+    case Primitive::kPrimInt: {
+      __ LoadImmediate(out, 0);
+      __ cmp(left.AsRegister<Register>(),
+             ShifterOperand(right.AsRegister<Register>()));  // Signed compare.
+      less_cond = LT;
+      break;
+    }
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
              ShifterOperand(right.AsRegisterPairHigh<Register>()));  // Signed compare.
@@ -3808,6 +3816,7 @@
       LOG(FATAL) << "Unexpected compare type " << type;
       UNREACHABLE();
   }
+
   __ b(&done, EQ);
   __ b(&less, less_cond);
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4179fab..e20e044 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2408,6 +2408,7 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   Primitive::Type in_type = compare->InputAt(0)->GetType();
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
@@ -2436,14 +2437,14 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       Register result = OutputRegister(compare);
       Register left = InputRegisterAt(compare, 0);
       Operand right = InputOperandAt(compare, 1);
-
       __ Cmp(left, right);
-      __ Cset(result, ne);
-      __ Cneg(result, result, lt);
+      __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
+      __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
       break;
     }
     case Primitive::kPrimFloat:
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 961fe62..e9c0b6a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2123,6 +2123,7 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
 
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -2153,6 +2154,14 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimInt: {
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Register rhs = locations->InAt(1).AsRegister<Register>();
+      __ Slt(TMP, lhs, rhs);
+      __ Slt(res, rhs, lhs);
+      __ Subu(res, res, TMP);
+      break;
+    }
     case Primitive::kPrimLong: {
       MipsLabel done;
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3e1563c..da98a89 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1763,6 +1763,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
 
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
@@ -1791,16 +1792,25 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
       Location rhs_location = locations->InAt(1);
       bool use_imm = rhs_location.IsConstant();
       GpuRegister rhs = ZERO;
       if (use_imm) {
-        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
-        if (value != 0) {
-          rhs = AT;
-          __ LoadConst64(rhs, value);
+        if (in_type == Primitive::kPrimInt) {
+          int32_t value = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()->AsConstant());
+          if (value != 0) {
+            rhs = AT;
+            __ LoadConst32(rhs, value);
+          }
+        } else {
+          int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+          if (value != 0) {
+            rhs = AT;
+            __ LoadConst64(rhs, value);
+          }
         }
       } else {
         rhs = rhs_location.AsRegister<GpuRegister>();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6304fb5..de62010 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1350,11 +1350,7 @@
     int32_t val_high = High32Bits(value);
     int32_t val_low = Low32Bits(value);
 
-    if (val_high == 0) {
-      __ testl(left_high, left_high);
-    } else {
-      __ cmpl(left_high, Immediate(val_high));
-    }
+    codegen_->Compare32BitValue(left_high, val_high);
     if (if_cond == kCondNE) {
       __ j(X86Condition(true_high_cond), true_label);
     } else if (if_cond == kCondEQ) {
@@ -1364,11 +1360,7 @@
       __ j(X86Condition(false_high_cond), false_label);
     }
     // Must be equal high, so compare the lows.
-    if (val_low == 0) {
-      __ testl(left_low, left_low);
-    } else {
-      __ cmpl(left_low, Immediate(val_low));
-    }
+    codegen_->Compare32BitValue(left_low, val_low);
   } else {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
@@ -1547,11 +1539,7 @@
       __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
     } else if (rhs.IsConstant()) {
       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-      if (constant == 0) {
-        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-      } else {
-        __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-      }
+      codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
     } else {
       __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
     }
@@ -1744,11 +1732,7 @@
         __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
       } else if (rhs.IsConstant()) {
         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-        }
+        codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
@@ -4143,6 +4127,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -4174,7 +4159,21 @@
   Location right = locations->InAt(1);
 
   NearLabel less, greater, done;
+  Condition less_cond = kLess;
+
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt: {
+      Register left_reg = left.AsRegister<Register>();
+      if (right.IsConstant()) {
+        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+        codegen_->Compare32BitValue(left_reg, value);
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(ESP, right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<Register>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       Register left_low = left.AsRegisterPairLow<Register>();
       Register left_high = left.AsRegisterPairHigh<Register>();
@@ -4196,11 +4195,7 @@
         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
         DCHECK(right_is_const) << right;
-        if (val_high == 0) {
-          __ testl(left_high, left_high);
-        } else {
-          __ cmpl(left_high, Immediate(val_high));
-        }
+        codegen_->Compare32BitValue(left_high, val_high);
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
@@ -4210,30 +4205,30 @@
         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
         DCHECK(right_is_const) << right;
-        if (val_low == 0) {
-          __ testl(left_low, left_low);
-        } else {
-          __ cmpl(left_low, Immediate(val_low));
-        }
+        codegen_->Compare32BitValue(left_low, val_low);
       }
+      less_cond = kBelow;  // for CF (unsigned).
       break;
     }
     case Primitive::kPrimFloat: {
       GenerateFPCompare(left, right, compare, false);
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  // for CF (floats).
       break;
     }
     case Primitive::kPrimDouble: {
       GenerateFPCompare(left, right, compare, true);
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  // for CF (floats).
       break;
     }
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   }
+
   __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(kBelow, &less);  // kBelow is for CF (unsigned & floats).
+  __ j(less_cond, &less);
 
   __ Bind(&greater);
   __ movl(out, Immediate(1));
@@ -7193,6 +7188,22 @@
   return Address(reg, kDummy32BitOffset, fixup);
 }
 
+void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
+  if (value == 0) {
+    __ xorl(dest, dest);
+  } else {
+    __ movl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
+  if (value == 0) {
+    __ testl(dest, dest);
+  } else {
+    __ cmpl(dest, Immediate(value));
+  }
+}
+
 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
                                            Register reg,
                                            Register value) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index d51b96f..45e8ffa 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -452,6 +452,12 @@
   Address LiteralInt32Address(int32_t v, Register reg);
   Address LiteralInt64Address(int64_t v, Register reg);
 
+  // Load a 32-bit value into a register in the most efficient manner.
+  void Load32BitValue(Register dest, int32_t value);
+
+  // Compare a register with a 32-bit value in the most efficient manner.
+  void Compare32BitValue(Register dest, int32_t value);
+
   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6795488..99396cd 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1126,30 +1126,43 @@
     return;
   }
   if (destination.IsRegister()) {
+    CpuRegister dest = destination.AsRegister<CpuRegister>();
     if (source.IsRegister()) {
-      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+      __ movq(dest, source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>());
+      __ movd(dest, source.AsFpuRegister<XmmRegister>());
     } else if (source.IsStackSlot()) {
-      __ movl(destination.AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else if (source.IsConstant()) {
+      HConstant* constant = source.GetConstant();
+      if (constant->IsLongConstant()) {
+        Load64BitValue(dest, constant->AsLongConstant()->GetValue());
+      } else {
+        Load32BitValue(dest, GetInt32ValueOf(constant));
+      }
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(destination.AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsFpuRegister()) {
+    XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
     if (source.IsRegister()) {
-      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>());
+      __ movd(dest, source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+      __ movaps(dest, source.AsFpuRegister<XmmRegister>());
+    } else if (source.IsConstant()) {
+      HConstant* constant = source.GetConstant();
+      int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+      if (constant->IsFloatConstant()) {
+        Load32BitValue(dest, static_cast<int32_t>(value));
+      } else {
+        Load64BitValue(dest, value);
+      }
     } else if (source.IsStackSlot()) {
-      __ movss(destination.AsFpuRegister<XmmRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movsd(destination.AsFpuRegister<XmmRegister>(),
-               Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsStackSlot()) {
     if (source.IsRegister()) {
@@ -1345,42 +1358,44 @@
   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
 }
 
-template<class LabelType>
-void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
-                                                                  LabelType* true_target_in,
-                                                                  LabelType* false_target_in) {
-  // Generated branching requires both targets to be explicit. If either of the
-  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
-  LabelType fallthrough_target;
-  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
-  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
-
+void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
   LocationSummary* locations = condition->GetLocations();
+
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
+        if (value == 0) {
+          __ testl(left_reg, left_reg);
+        } else {
+          __ cmpl(left_reg, Immediate(value));
+        }
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(left_reg, left_reg);
-          } else {
-            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in a 32-bit integer.
-          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(left_reg, value);
       } else if (right.IsDoubleStackSlot()) {
         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
       }
-      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1395,7 +1410,6 @@
         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
                    Address(CpuRegister(RSP), right.GetStackIndex()));
       }
-      GenerateFPJumps(condition, true_target, false_target);
       break;
     }
     case Primitive::kPrimDouble: {
@@ -1410,6 +1424,38 @@
         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
                    Address(CpuRegister(RSP), right.GetStackIndex()));
       }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected condition type " << type;
+  }
+}
+
+template<class LabelType>
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
+                                                                  LabelType* true_target_in,
+                                                                  LabelType* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  LabelType fallthrough_target;
+  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
+  // Generate the comparison to set the CC.
+  GenerateCompareTest(condition);
+
+  // Now generate the correct jump(s).
+  Primitive::Type type = condition->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimLong: {
+      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    }
+    case Primitive::kPrimDouble: {
       GenerateFPJumps(condition, true_target, false_target);
       break;
     }
@@ -1508,11 +1554,7 @@
       __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
     } else if (rhs.IsConstant()) {
       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-      if (constant == 0) {
-        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-      } else {
-        __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-      }
+      codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
     } else {
       __ cmpl(lhs.AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), rhs.GetStackIndex()));
@@ -1564,14 +1606,37 @@
                                /* false_target */ nullptr);
 }
 
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition() &&
+      Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
+    return false;
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
+}
+
 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    // Since we can't use CMOV, there is no need to force 'true' into a register.
+    locations->SetInAt(1, Location::Any());
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      locations->SetInAt(1, Location::RequiresRegister());
+    } else {
+      // Since we can't use CMOV, there is no need to force 'true' into a register.
+      locations->SetInAt(1, Location::Any());
+    }
   }
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
     locations->SetInAt(2, Location::RequiresRegister());
@@ -1581,13 +1646,52 @@
 
 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
   LocationSummary* locations = select->GetLocations();
-  NearLabel false_target;
-  GenerateTestAndBranch<NearLabel>(select,
-                                   /* condition_input_index */ 2,
-                                   /* true_target */ nullptr,
-                                   &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+    CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
+    CpuRegister value_true = locations->InAt(1).AsRegister<CpuRegister>();
+    DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
+
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction.  Condition codes are right.
+          cond = X86_64IntegerCondition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        GenerateCompareTest(condition);
+        cond = X86_64IntegerCondition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+      __ testl(cond_reg, cond_reg);
+    }
+
+    // If the condition is true, overwrite the output, which already contains false.
+    // Generate the correct sized CMOV.
+    __ cmov(cond, value_false, value_true, select->GetType() == Primitive::kPrimLong);
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(select,
+                                     /* condition_input_index */ 2,
+                                     /* true_target */ nullptr,
+                                     &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1691,11 +1795,7 @@
         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-        }
+        codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
       } else {
         __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
@@ -1709,16 +1809,7 @@
         __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
         int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-          } else {
-            __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in an int.
-          __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
       } else {
         __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
@@ -1850,6 +1941,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -1876,21 +1968,26 @@
 
   NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond = kLess;
+
   switch (type) {
+    case Primitive::kPrimInt: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+        codegen_->Compare32BitValue(left_reg, value);
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(left_reg, left_reg);
-          } else {
-            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in an int.
-          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(left_reg, value);
       } else if (right.IsDoubleStackSlot()) {
         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
@@ -1909,6 +2006,7 @@
         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
       }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  //  ucomis{s,d} sets CF
       break;
     }
     case Primitive::kPrimDouble: {
@@ -1922,14 +2020,16 @@
         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
       }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  //  ucomis{s,d} sets CF
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
   }
+
   __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less);  //  ucomis{s,d} sets CF (kBelow)
+  __ j(less_cond, &less);
 
   __ Bind(&greater);
   __ movl(out, Immediate(1));
@@ -2750,11 +2850,7 @@
           } else if (in.IsConstant()) {
             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2768,11 +2864,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2786,11 +2878,7 @@
           } else if (in.IsConstant()) {
             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (bit_cast<int64_t, double>(v) == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -2817,11 +2905,7 @@
           } else if (in.IsConstant()) {
             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2835,11 +2919,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2853,11 +2933,7 @@
           } else if (in.IsConstant()) {
             float v = in.GetConstant()->AsFloatConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (bit_cast<int32_t, float>(v) == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -5196,18 +5272,12 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<int32_t, float>(fp_value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
-        if (value == 0) {
-          // easy FP 0.0.
-          __ xorps(dest, dest);
-        } else {
-          __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
-        }
+        codegen_->Load32BitValue(dest, fp_value);
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
-        Immediate imm(value);
+        Immediate imm(bit_cast<int32_t, float>(fp_value));
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
@@ -5216,11 +5286,7 @@
       int64_t value = bit_cast<int64_t, double>(fp_value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
-        if (value == 0) {
-          __ xorpd(dest, dest);
-        } else {
-          __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
-        }
+        codegen_->Load64BitValue(dest, fp_value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         codegen_->Store64BitValueToStack(destination, value);
@@ -6467,6 +6533,51 @@
   }
 }
 
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
+  if (value == 0) {
+    __ xorps(dest, dest);
+  } else {
+    __ movss(dest, LiteralInt32Address(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
+  if (value == 0) {
+    __ xorpd(dest, dest);
+  } else {
+    __ movsd(dest, LiteralInt64Address(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
+  Load32BitValue(dest, bit_cast<int32_t, float>(value));
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
+  Load64BitValue(dest, bit_cast<int64_t, double>(value));
+}
+
+void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
+  if (value == 0) {
+    __ testl(dest, dest);
+  } else {
+    __ cmpl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
+  if (IsInt<32>(value)) {
+    if (value == 0) {
+      __ testq(dest, dest);
+    } else {
+      __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
+    }
+  } else {
+    // Value won't fit in an int.
+    __ cmpq(dest, LiteralInt64Address(value));
+  }
+}
+
 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
   DCHECK(dest.IsDoubleStackSlot());
   if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 318087e..72dddfd 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -264,6 +264,7 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
+  void GenerateCompareTest(HCondition* condition);
   template<class LabelType>
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
@@ -478,9 +479,17 @@
   Address LiteralInt32Address(int32_t v);
   Address LiteralInt64Address(int64_t v);
 
-  // Load a 32/64 bit value into a register in the most efficient manner.
+  // Load a 32/64-bit value into a register in the most efficient manner.
   void Load32BitValue(CpuRegister dest, int32_t value);
   void Load64BitValue(CpuRegister dest, int64_t value);
+  void Load32BitValue(XmmRegister dest, int32_t value);
+  void Load64BitValue(XmmRegister dest, int64_t value);
+  void Load32BitValue(XmmRegister dest, float value);
+  void Load64BitValue(XmmRegister dest, double value);
+
+  // Compare a register with a 32/64-bit value in the most efficient manner.
+  void Compare32BitValue(CpuRegister dest, int32_t value);
+  void Compare64BitValue(CpuRegister dest, int64_t value);
 
   Address LiteralCaseTable(HPackedSwitch* switch_instr);
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 9b91b53..a8841d3 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -758,6 +758,7 @@
       compiler_driver_->GetInstructionSet(),
       invoke_type,
       graph_->IsDebuggable(),
+      /* osr */ false,
       graph_->GetCurrentInstructionId());
   callee_graph->SetArtMethod(resolved_method);
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index c1e3863..0029cc3 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -91,6 +91,7 @@
   void SimplifyRotate(HInvoke* invoke, bool is_left);
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
+  void SimplifyCompare(HInvoke* invoke, bool has_zero_op);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -176,8 +177,8 @@
 
   // We can apply De Morgan's laws if both inputs are Not's and are only used
   // by `op`.
-  if (left->IsNot() &&
-      right->IsNot() &&
+  if (((left->IsNot() && right->IsNot()) ||
+       (left->IsBooleanNot() && right->IsBooleanNot())) &&
       left->HasOnlyOneNonEnvironmentUse() &&
       right->HasOnlyOneNonEnvironmentUse()) {
     // Replace code looking like
@@ -187,8 +188,8 @@
     // with
     //    OR or, a, b         (respectively AND)
     //    NOT dest, or
-    HInstruction* src_left = left->AsNot()->GetInput();
-    HInstruction* src_right = right->AsNot()->GetInput();
+    HInstruction* src_left = left->InputAt(0);
+    HInstruction* src_right = right->InputAt(0);
     uint32_t dex_pc = op->GetDexPc();
 
     // Remove the negations on the inputs.
@@ -204,7 +205,12 @@
     } else {
       hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc);
     }
-    HNot* hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+    HInstruction* hnot;
+    if (left->IsBooleanNot()) {
+      hnot = new (GetGraph()->GetArena()) HBooleanNot(hbin, dex_pc);
+    } else {
+      hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+    }
 
     op->GetBlock()->InsertInstructionBefore(hbin, op);
     op->GetBlock()->ReplaceAndRemoveInstructionWith(op, hnot);
@@ -1308,8 +1314,8 @@
 
   HInstruction* left = instruction->GetLeft();
   HInstruction* right = instruction->GetRight();
-  if (left->IsNot() &&
-      right->IsNot() &&
+  if (((left->IsNot() && right->IsNot()) ||
+       (left->IsBooleanNot() && right->IsBooleanNot())) &&
       left->HasOnlyOneNonEnvironmentUse() &&
       right->HasOnlyOneNonEnvironmentUse()) {
     // Replace code looking like
@@ -1318,8 +1324,8 @@
     //    XOR dst, nota, notb
     // with
     //    XOR dst, a, b
-    instruction->ReplaceInput(left->AsNot()->GetInput(), 0);
-    instruction->ReplaceInput(right->AsNot()->GetInput(), 1);
+    instruction->ReplaceInput(left->InputAt(0), 0);
+    instruction->ReplaceInput(right->InputAt(0), 1);
     left->GetBlock()->RemoveInstruction(left);
     right->GetBlock()->RemoveInstruction(right);
     RecordSimplification();
@@ -1441,6 +1447,24 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke, bool is_signum) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  HInstruction* left = invoke->InputAt(0);
+  HInstruction* right;
+  Primitive::Type type = left->GetType();
+  if (!is_signum) {
+    right = invoke->InputAt(1);
+  } else if (type == Primitive::kPrimLong) {
+    right = GetGraph()->GetLongConstant(0);
+  } else {
+    right = GetGraph()->GetIntConstant(0);
+  }
+  HCompare* compare = new (GetGraph()->GetArena())
+      HCompare(type, left, right, ComparisonBias::kNoBias, dex_pc);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
   if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
     SimplifyStringEquals(instruction);
@@ -1452,6 +1476,12 @@
   } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
              instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
     SimplifyRotate(instruction, true);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerCompare ||
+             instruction->GetIntrinsic() == Intrinsics::kLongCompare) {
+    SimplifyCompare(instruction, /* is_signum */ false);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum ||
+             instruction->GetIntrinsic() == Intrinsics::kLongSignum) {
+    SimplifyCompare(instruction, /* is_signum */ true);
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e8912b3..96a3c3c 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1633,21 +1633,21 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
 UNIMPLEMENTED_INTRINSIC(LongSignum)
 
-// Rotate operations are handled as HRor instructions.
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5dce83a..4140d94 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -284,36 +284,6 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-static void GenCompare(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
-  Location op1 = locations->InAt(0);
-  Location op2 = locations->InAt(1);
-  Location out = locations->Out();
-
-  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
-  Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
-  Register out_reg = WRegisterFrom(out);
-
-  __ Cmp(op1_reg, op2_reg);
-  __ Cset(out_reg, gt);           // out == +1 if GT or 0 otherwise
-  __ Cinv(out_reg, out_reg, lt);  // out == -1 if LT or unchanged otherwise
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerCompare(HInvoke* invoke) {
-  CreateIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerCompare(HInvoke* invoke) {
-  GenCompare(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongCompare(HInvoke* invoke) {
-  CreateIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongCompare(HInvoke* invoke) {
-  GenCompare(invoke->GetLocations(), /* is_long */ true,  GetVIXLAssembler());
-}
-
 static void GenNumberOfLeadingZeros(LocationSummary* locations,
                                     Primitive::Type type,
                                     vixl::MacroAssembler* masm) {
@@ -1456,34 +1426,6 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-static void GenSignum(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
-  Location op1 = locations->InAt(0);
-  Location out = locations->Out();
-
-  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
-  Register out_reg = WRegisterFrom(out);
-
-  __ Cmp(op1_reg, 0);
-  __ Cset(out_reg, gt);           // out == +1 if GT or 0 otherwise
-  __ Cinv(out_reg, out_reg, lt);  // out == -1 if LT or unchanged otherwise
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerSignum(HInvoke* invoke) {
-  CreateIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerSignum(HInvoke* invoke) {
-  GenSignum(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongSignum(HInvoke* invoke) {
-  CreateIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongSignum(HInvoke* invoke) {
-  GenSignum(invoke->GetLocations(), /* is_long */ true,  GetVIXLAssembler());
-}
-
 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
@@ -1684,11 +1626,15 @@
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 0d9cf09..2294713 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1019,12 +1019,14 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
 UNIMPLEMENTED_INTRINSIC(LongSignum)
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index f681d1f..ac28503 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1767,12 +1767,14 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
 UNIMPLEMENTED_INTRINSIC(LongSignum)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 529f678..ab4f6f9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2291,7 +2291,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg = locations->InAt(0).AsRegister<Register>();
@@ -2322,7 +2322,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
@@ -2366,7 +2366,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenBitCount(X86Assembler* assembler,
+                        CodeGeneratorX86* codegen,
+                        HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2377,11 +2379,7 @@
     value = is_long
         ? POPCOUNT(static_cast<uint64_t>(value))
         : POPCOUNT(static_cast<uint32_t>(value));
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2413,7 +2411,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
-  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
@@ -2421,7 +2419,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
-  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2436,7 +2434,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenLeadingZeros(X86Assembler* assembler,
+                            CodeGeneratorX86* codegen,
+                            HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2449,11 +2449,7 @@
     } else {
       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2520,8 +2516,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ false);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2529,8 +2524,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ true);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2545,7 +2539,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenTrailingZeros(X86Assembler* assembler,
+                             CodeGeneratorX86* codegen,
+                             HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2558,11 +2554,7 @@
     } else {
       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2616,8 +2608,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ false);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2625,8 +2616,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ true);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 // Unimplemented intrinsics.
@@ -2646,20 +2636,20 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerSignum)
-UNIMPLEMENTED_INTRINSIC(LongSignum)
 
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 51fa514..c9a4344 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2431,58 +2431,6 @@
   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-static void CreateCompareLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
-}
-
-static void GenCompare(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
-  LocationSummary* locations = invoke->GetLocations();
-  CpuRegister src1 = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister src2 = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  NearLabel is_lt, done;
-
-  __ xorl(out, out);
-
-  if (is_long) {
-    __ cmpq(src1, src2);
-  } else {
-    __ cmpl(src1, src2);
-  }
-  __ j(kEqual, &done);
-  __ j(kLess, &is_lt);
-
-  __ movl(out, Immediate(1));
-  __ jmp(&done);
-
-  __ Bind(&is_lt);
-  __ movl(out, Immediate(-1));
-
-  __ Bind(&done);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerCompare(HInvoke* invoke) {
-  CreateCompareLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerCompare(HInvoke* invoke) {
-  GenCompare(GetAssembler(), invoke, /* is_long */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongCompare(HInvoke* invoke) {
-  CreateCompareLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongCompare(HInvoke* invoke) {
-  GenCompare(GetAssembler(), invoke, /* is_long */ true);
-}
-
 static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2757,74 +2705,6 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-static void CreateSignLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::Any());
-  locations->SetOut(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());  // Need a writeable register.
-}
-
-static void GenSign(X86_64Assembler* assembler,
-                    CodeGeneratorX86_64* codegen,
-                    HInvoke* invoke, bool is_long) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location src = locations->InAt(0);
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  if (invoke->InputAt(0)->IsConstant()) {
-    // Evaluate this at compile time.
-    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
-    codegen->Load32BitValue(out, value == 0 ? 0 : (value > 0 ? 1 : -1));
-    return;
-  }
-
-  // Copy input into temporary.
-  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
-  if (src.IsRegister()) {
-    if (is_long) {
-      __ movq(tmp, src.AsRegister<CpuRegister>());
-    } else {
-      __ movl(tmp, src.AsRegister<CpuRegister>());
-    }
-  } else if (is_long) {
-    DCHECK(src.IsDoubleStackSlot());
-    __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
-  } else {
-    DCHECK(src.IsStackSlot());
-    __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
-  }
-
-  // Do the bit twiddling: basically tmp >> 63/31 | -tmp >>> 63/31 for long/int.
-  if (is_long) {
-    __ movq(out, tmp);
-    __ sarq(out, Immediate(63));
-    __ negq(tmp);
-    __ shrq(tmp, Immediate(63));
-    __ orq(out, tmp);
-  } else {
-    __ movl(out, tmp);
-    __ sarl(out, Immediate(31));
-    __ negl(tmp);
-    __ shrl(tmp, Immediate(31));
-    __ orl(out, tmp);
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerSignum(HInvoke* invoke) {
-  CreateSignLocations(arena_, invoke);
-}
-void IntrinsicCodeGeneratorX86_64::VisitIntegerSignum(HInvoke* invoke) {
-  GenSign(GetAssembler(), codegen_, invoke, /* is_long */ false);
-}
-void IntrinsicLocationsBuilderX86_64::VisitLongSignum(HInvoke* invoke) {
-  CreateSignLocations(arena_, invoke);
-}
-void IntrinsicCodeGeneratorX86_64::VisitLongSignum(HInvoke* invoke) {
-  GenSign(GetAssembler(), codegen_, invoke, /* is_long */ true);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2840,11 +2720,15 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3dda850..f269885 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -647,6 +647,10 @@
       header_->GetGraph()->SetHasIrreducibleLoops(true);
       PopulateIrreducibleRecursive(back_edge);
     } else {
+      if (header_->GetGraph()->IsCompilingOsr()) {
+        irreducible_ = true;
+        header_->GetGraph()->SetHasIrreducibleLoops(true);
+      }
       PopulateRecursive(back_edge);
     }
   }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6c63af7..daec096 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -274,6 +274,7 @@
          InstructionSet instruction_set,
          InvokeType invoke_type = kInvalidInvokeType,
          bool debuggable = false,
+         bool osr = false,
          int start_instruction_id = 0)
       : arena_(arena),
         blocks_(arena->Adapter(kArenaAllocBlockList)),
@@ -302,7 +303,8 @@
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
-        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
+        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
+        osr_(osr) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -478,6 +480,8 @@
     return instruction_set_;
   }
 
+  bool IsCompilingOsr() const { return osr_; }
+
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
@@ -606,6 +610,11 @@
   // collection pointer to passes which may create NullConstant.
   ReferenceTypeInfo inexact_object_rti_;
 
+  // Whether we are compiling this graph for on stack replacement: this will
+  // make all loops seen as irreducible and emit special stack maps to mark
+  // compiled code entries which the interpreter can directly jump to.
+  const bool osr_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -6041,6 +6050,74 @@
   FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
+class SwitchTable : public ValueObject {
+ public:
+  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+    int32_t table_offset = instruction.VRegB_31t();
+    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+    if (sparse) {
+      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+    } else {
+      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+    }
+    num_entries_ = table[1];
+    values_ = reinterpret_cast<const int32_t*>(&table[2]);
+  }
+
+  uint16_t GetNumEntries() const {
+    return num_entries_;
+  }
+
+  void CheckIndex(size_t index) const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+    }
+  }
+
+  int32_t GetEntryAt(size_t index) const {
+    CheckIndex(index);
+    return values_[index];
+  }
+
+  uint32_t GetDexPcForIndex(size_t index) const {
+    CheckIndex(index);
+    return dex_pc_ +
+        (reinterpret_cast<const int16_t*>(values_ + index) -
+         reinterpret_cast<const int16_t*>(&instruction_));
+  }
+
+  // Index of the first value in the table.
+  size_t GetFirstValueIndex() const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      return num_entries_;
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      return 1;
+    }
+  }
+
+ private:
+  const Instruction& instruction_;
+  const uint32_t dex_pc_;
+
+  // Whether this is a sparse-switch table (or a packed-switch one).
+  const bool sparse_;
+
+  // This can't be const as it needs to be computed off of the given instruction, and complicated
+  // expressions in the initializer list seemed very ugly.
+  uint16_t num_entries_;
+
+  const int32_t* values_;
+
+  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c8bef81..d9f993d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -299,7 +299,7 @@
     }
   }
 
-  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
       OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -308,7 +308,8 @@
   CompiledMethod* Emit(ArenaAllocator* arena,
                        CodeVectorAllocator* code_allocator,
                        CodeGenerator* codegen,
-                       CompilerDriver* driver) const;
+                       CompilerDriver* driver,
+                       const DexFile::CodeItem* item) const;
 
   // Try compiling a method and return the code generator used for
   // compiling it.
@@ -326,7 +327,8 @@
                             uint32_t method_idx,
                             jobject class_loader,
                             const DexFile& dex_file,
-                            Handle<mirror::DexCache> dex_cache) const;
+                            Handle<mirror::DexCache> dex_cache,
+                            bool osr) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
@@ -579,11 +581,12 @@
 CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
                                          CodeVectorAllocator* code_allocator,
                                          CodeGenerator* codegen,
-                                         CompilerDriver* compiler_driver) const {
+                                         CompilerDriver* compiler_driver,
+                                         const DexFile::CodeItem* code_item) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   stack_map.resize(codegen->ComputeStackMapsSize());
-  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
@@ -614,7 +617,8 @@
                                               uint32_t method_idx,
                                               jobject class_loader,
                                               const DexFile& dex_file,
-                                              Handle<mirror::DexCache> dex_cache) const {
+                                              Handle<mirror::DexCache> dex_cache,
+                                              bool osr) const {
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -662,8 +666,14 @@
                                                      dex_compilation_unit.GetDexFile(),
                                                      dex_compilation_unit.GetClassDefIndex());
   HGraph* graph = new (arena) HGraph(
-      arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
-      kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
+      arena,
+      dex_file,
+      method_idx,
+      requires_barrier,
+      compiler_driver->GetInstructionSet(),
+      kInvalidInvokeType,
+      compiler_driver->GetCompilerOptions().GetDebuggable(),
+      osr);
 
   std::unique_ptr<CodeGenerator> codegen(
       CodeGenerator::Create(graph,
@@ -796,10 +806,11 @@
                    method_idx,
                    jclass_loader,
                    dex_file,
-                   dex_cache));
+                   dex_cache,
+                   /* osr */ false));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
-      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
     }
   } else {
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -842,7 +853,8 @@
 
 bool OptimizingCompiler::JitCompile(Thread* self,
                                     jit::JitCodeCache* code_cache,
-                                    ArtMethod* method) {
+                                    ArtMethod* method,
+                                    bool osr) {
   StackHandleScope<2> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       method->GetDeclaringClass()->GetClassLoader()));
@@ -872,7 +884,8 @@
                    method_idx,
                    jclass_loader,
                    *dex_file,
-                   dex_cache));
+                   dex_cache,
+                   osr));
     if (codegen.get() == nullptr) {
       return false;
     }
@@ -884,7 +897,7 @@
     return false;
   }
   MaybeRecordStat(MethodCompilationStat::kCompiled);
-  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
   const void* code = code_cache->CommitCode(
       self,
       method,
@@ -895,7 +908,8 @@
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
-      code_allocator.GetSize());
+      code_allocator.GetSize(),
+      osr);
 
   if (code == nullptr) {
     code_cache->ClearData(self, stack_map_data);
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 631b784..949ad99 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -429,6 +429,56 @@
 END art_quick_invoke_stub_internal
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   r0 = stack to copy
+     *   r1 = size of stack
+     *   r2 = pc to call
+     *   r3 = JValue* result
+     *   [sp] = shorty
+     *   [sp + 4] = thread
+     */
+ENTRY art_quick_osr_stub
+    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
+    mov    r11, sp                         @ Save the stack pointer
+    mov    r10, r1                         @ Save size of stack
+    ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
+    mov    r8, r2                          @ Save the pc to call
+    sub    r7, sp, #12                     @ Reserve space for stack pointer, JValue result, and ArtMethod* slot
+    and    r7, #0xFFFFFFF0                 @ Align stack pointer
+    mov    sp, r7                          @ Update stack pointer
+    str    r11, [sp, #4]                   @ Save old stack pointer
+    str    r3, [sp, #8]                    @ Save JValue result
+    mov    ip, #0
+    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
+    sub    sp, sp, r1                      @ Reserve space for callee stack
+    mov    r2, r1
+    mov    r1, r0
+    mov    r0, sp
+    bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
+    bl     .Losr_entry                     @ Call the method
+    ldr    r11, [sp, #4]                   @ Restore saved stack pointer
+    ldr    r10, [sp, #8]                   @ Restore JValue result
+    mov    sp, r11                         @ Restore stack pointer.
+    ldr    r4, [sp, #36]                   @ load shorty
+    ldrb   r4, [r4, #0]                    @ load return type
+    cmp    r4, #68                         @ Test if result type char == 'D'.
+    beq    .Losr_fp_result
+    cmp    r4, #70                         @ Test if result type char == 'F'.
+    beq    .Losr_fp_result
+    strd r0, [r10]                         @ Store r0/r1 into result pointer
+    b    .Losr_exit
+.Losr_fp_result:
+    vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
+.Losr_exit:
+    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+.Losr_entry:
+    sub r10, r10, #4
+    str lr, [sp, r10]                     @ Store link register per the compiler ABI
+    bx r8
+END art_quick_osr_stub
+
+    /*
      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
      */
 ARM_ENTRY art_quick_do_long_jump
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9ccabad..e848008 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -915,6 +915,105 @@
 
 
 
+/*  extern"C" void art_quick_osr_stub(void** stack,                x0
+ *                                    size_t stack_size_in_bytes,  x1
+ *                                    const uin8_t* native_pc,     x2
+ *                                    JValue *result,              x3
+ *                                    char   *shorty,              x4
+ *                                    Thread *self)                x5
+ */
+ENTRY art_quick_osr_stub
+SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
+    mov x9, sp                             // Save stack pointer.
+    .cfi_register sp,x9
+
+    sub x10, sp, # SAVE_SIZE
+    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
+    mov sp, x10                            // Set new SP.
+
+    str x28, [sp, #112]
+    stp x26, x27, [sp, #96]
+    stp x24, x25, [sp, #80]
+    stp x22, x23, [sp, #64]
+    stp x20, x21, [sp, #48]
+    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
+    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
+    stp xFP, xLR, [sp]                    // Store LR & FP.
+    mov xSELF, x5                         // Move thread pointer into SELF register.
+
+    sub sp, sp, #16
+    str xzr, [sp]                         // Store null for ArtMethod* slot
+    // Branch to stub.
+    bl .Losr_entry
+    add sp, sp, #16
+
+    // Restore return value address and shorty address.
+    ldp x3,x4, [sp, #16]
+    ldr x28, [sp, #112]
+    ldp x26, x27, [sp, #96]
+    ldp x24, x25, [sp, #80]
+    ldp x22, x23, [sp, #64]
+    ldp x20, x21, [sp, #48]
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x4]
+
+    // Check the return type and store the correct register into the jvalue in memory.
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Losr_exit
+
+    // Is it a double?
+    cmp w10, #'D'
+    bne .Lno_double
+    str d0, [x3]
+    b .Losr_exit
+
+.Lno_double:  // Is it a float?
+    cmp w10, #'F'
+    bne .Lno_float
+    str s0, [x3]
+    b .Losr_exit
+
+.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
+    str x0, [x3]
+
+.Losr_exit:  // Finish up.
+    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
+    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
+    mov sp, x2
+    ret
+
+.Losr_entry:
+    // Update stack pointer for the callee
+    sub sp, sp, x1
+
+    // Update link register slot expected by the callee.
+    sub w1, w1, #8
+    str lr, [sp, x1]
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X0 - source address
+    // W1 - args length
+    // SP - destination address.
+    // W10 - temporary
+.Losr_loop_entry:
+    cmp w1, #0
+    beq .Losr_loop_exit
+    sub w1, w1, #4
+    ldr w10, [x0, x1]
+    str w10, [sp, x1]
+    b .Losr_loop_entry
+
+.Losr_loop_exit:
+    // Branch to the OSR entry point.
+    br x2
+
+END art_quick_osr_stub
+
     /*
      * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
      */
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index da30331..fbee5d7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1712,5 +1712,65 @@
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
 
+  /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   [sp + 4] = stack to copy
+     *   [sp + 8] = size of stack
+     *   [sp + 12] = pc to call
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = shorty
+     *   [sp + 24] = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save native callee saves.
+    PUSH ebp
+    PUSH ebx
+    PUSH esi
+    PUSH edi
+    mov 4+16(%esp), %esi           // ESI = argument array
+    mov 8+16(%esp), %ecx           // ECX = size of args
+    mov 12+16(%esp), %ebx          // EBX = pc to call
+    mov %esp, %ebp                 // Save stack pointer
+    andl LITERAL(0xFFFFFFF0), %esp // Align stack
+    PUSH ebp                       // Save old stack pointer
+    subl LITERAL(12), %esp         // Align stack
+    movl LITERAL(0), (%esp)        // Store null for ArtMethod* slot
+    call .Losr_entry
+
+    // Restore stack pointer.
+    addl LITERAL(12), %esp
+    POP ebp
+    mov %ebp, %esp
+
+    // Restore callee saves.
+    POP edi
+    POP esi
+    POP ebx
+    POP ebp
+    mov 16(%esp), %ecx            // Get JValue result
+    mov %eax, (%ecx)              // Store the result assuming it is a long, int or Object*
+    mov %edx, 4(%ecx)             // Store the other half of the result
+    mov 20(%esp), %edx            // Get the shorty
+    cmpb LITERAL(68), (%edx)      // Test if result type char == 'D'
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%edx)      // Test if result type char == 'F'
+    je .Losr_return_float_quick
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_entry:
+    subl LITERAL(4), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subl %ecx, %esp
+    mov %esp, %edi                // EDI = beginning of stack
+    rep movsb                     // while (ecx--) { *edi++ = *esi++ }
+    jmp *%ebx
+END_FUNCTION art_quick_osr_stub
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 883da96..d6e0f1c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1744,3 +1744,62 @@
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
+
+    /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = stack to copy
+     *   rsi = size of stack
+     *   rdx = pc to call
+     *   rcx = JValue* result
+     *   r8 = shorty
+     *   r9 = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save the non-volatiles.
+    PUSH rbp                      // Save rbp.
+    PUSH rcx                      // Save rcx/result*.
+    PUSH r8                       // Save r8/shorty*.
+
+    // Save callee saves.
+    PUSH rbx
+    PUSH r12
+    PUSH r13
+    PUSH r14
+    PUSH r15
+
+    pushq LITERAL(0)              // Push null for ArtMethod*.
+    movl %esi, %ecx               // rcx := size of stack
+    movq %rdi, %rsi               // rsi := stack to copy
+    call .Losr_entry
+
+    // Restore stack and callee-saves.
+    addq LITERAL(8), %rsp
+    POP r15
+    POP r14
+    POP r13
+    POP r12
+    POP rbx
+    POP r8
+    POP rcx
+    POP rbp
+    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
+    je .Losr_return_float_quick
+    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%rcx)            // Store the double floating point result.
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%rcx)            // Store the floating point result.
+    ret
+.Losr_entry:
+    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subq %rcx, %rsp
+    movq %rsp, %rdi               // rdi := beginning of stack
+    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
+    jmp *%rdx
+END_FUNCTION art_quick_osr_stub
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 6f36016..cd38e16 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -292,22 +292,7 @@
         // Unusual case where we were running generated code and an
         // exception was thrown to force the activations to be removed from the
         // stack. Continue execution in the interpreter.
-        self->ClearException();
-        ShadowFrame* shadow_frame =
-            self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
-        mirror::Throwable* pending_exception = nullptr;
-        bool from_code = false;
-        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
-        CHECK(!from_code);
-        self->SetTopOfStack(nullptr);
-        self->SetTopOfShadowStack(shadow_frame);
-
-        // Restore the exception that was pending before deoptimization then interpret the
-        // deoptimized frames.
-        if (pending_exception != nullptr) {
-          self->SetException(pending_exception);
-        }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
+        self->DeoptimizeWithDeoptimizationException(result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 31610a3..eb3b7f3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -150,11 +150,11 @@
 ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
             art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
-#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 34 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 16 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
             art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end.
-#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 35 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
             art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
 
@@ -331,21 +331,23 @@
 ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE,
             static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))
 
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 4
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3
 ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSizeShift))
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift))
 
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 15
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 7
 ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
 
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff0
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff8
 ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32),
-            ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
+            ~static_cast<uint32_t>(
+                art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
 
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff0
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff8
 ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64),
-            ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
+            ~static_cast<uint64_t>(
+                art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
 
 #define ROSALLOC_RUN_FREE_LIST_OFFSET 8
 ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 88d49b2..0631ebe 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -326,6 +326,24 @@
   std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr));
 }
 
+void ClassLinker::CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor) {
+  mirror::Class* c2 = FindSystemClass(self, descriptor);
+  if (c2 == nullptr) {
+    LOG(FATAL) << "Could not find class " << descriptor;
+    UNREACHABLE();
+  }
+  if (c1.Get() != c2) {
+    std::ostringstream os1, os2;
+    c1->DumpClass(os1, mirror::Class::kDumpClassFullDetail);
+    c2->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
+    LOG(FATAL) << "InitWithoutImage: Class mismatch for " << descriptor
+               << ". This is most likely the result of a broken build. Make sure that "
+               << "libcore and art projects match.\n\n"
+               << os1.str() << "\n\n" << os2.str();
+    UNREACHABLE();
+  }
+}
+
 bool ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
                                    std::string* error_msg) {
   VLOG(startup) << "ClassLinker::Init";
@@ -517,18 +535,12 @@
 
   // Object, String and DexCache need to be rerun through FindSystemClass to finish init
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusNotReady, self);
-  CHECK_EQ(java_lang_Object.Get(), FindSystemClass(self, "Ljava/lang/Object;"));
+  CheckSystemClass(self, java_lang_Object, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
   mirror::Class::SetStatus(java_lang_String, mirror::Class::kStatusNotReady, self);
-  mirror::Class* String_class = FindSystemClass(self, "Ljava/lang/String;");
-  if (java_lang_String.Get() != String_class) {
-    std::ostringstream os1, os2;
-    java_lang_String->DumpClass(os1, mirror::Class::kDumpClassFullDetail);
-    String_class->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
-    LOG(FATAL) << os1.str() << "\n\n" << os2.str();
-  }
+  CheckSystemClass(self, java_lang_String, "Ljava/lang/String;");
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
-  CHECK_EQ(java_lang_DexCache.Get(), FindSystemClass(self, "Ljava/lang/DexCache;"));
+  CheckSystemClass(self, java_lang_DexCache, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
@@ -538,14 +550,13 @@
   SetClassRoot(kByteArrayClass, FindSystemClass(self, "[B"));
   mirror::ByteArray::SetArrayClass(GetClassRoot(kByteArrayClass));
 
-  CHECK_EQ(char_array_class.Get(), FindSystemClass(self, "[C"));
+  CheckSystemClass(self, char_array_class, "[C");
 
   SetClassRoot(kShortArrayClass, FindSystemClass(self, "[S"));
   mirror::ShortArray::SetArrayClass(GetClassRoot(kShortArrayClass));
 
-  CHECK_EQ(int_array_class.Get(), FindSystemClass(self, "[I"));
-
-  CHECK_EQ(long_array_class.Get(), FindSystemClass(self, "[J"));
+  CheckSystemClass(self, int_array_class, "[I");
+  CheckSystemClass(self, long_array_class, "[J");
 
   SetClassRoot(kFloatArrayClass, FindSystemClass(self, "[F"));
   mirror::FloatArray::SetArrayClass(GetClassRoot(kFloatArrayClass));
@@ -553,9 +564,12 @@
   SetClassRoot(kDoubleArrayClass, FindSystemClass(self, "[D"));
   mirror::DoubleArray::SetArrayClass(GetClassRoot(kDoubleArrayClass));
 
-  CHECK_EQ(class_array_class.Get(), FindSystemClass(self, "[Ljava/lang/Class;"));
+  // Run Class through FindSystemClass. This initializes the dex_cache_ fields and register it
+  // in class_table_.
+  CheckSystemClass(self, java_lang_Class, "Ljava/lang/Class;");
 
-  CHECK_EQ(object_array_class.Get(), FindSystemClass(self, "[Ljava/lang/Object;"));
+  CheckSystemClass(self, class_array_class, "[Ljava/lang/Class;");
+  CheckSystemClass(self, object_array_class, "[Ljava/lang/Object;");
 
   // Setup the single, global copy of "iftable".
   auto java_lang_Cloneable = hs.NewHandle(FindSystemClass(self, "Ljava/lang/Cloneable;"));
@@ -577,14 +591,11 @@
            mirror::Class::GetDirectInterface(self, object_array_class, 0));
   CHECK_EQ(java_io_Serializable.Get(),
            mirror::Class::GetDirectInterface(self, object_array_class, 1));
-  // Run Class, ArtField, and ArtMethod through FindSystemClass. This initializes their
-  // dex_cache_ fields and register them in class_table_.
-  CHECK_EQ(java_lang_Class.Get(), FindSystemClass(self, "Ljava/lang/Class;"));
 
   CHECK_EQ(object_array_string.Get(),
            FindSystemClass(self, GetClassRootDescriptor(kJavaLangStringArrayClass)));
 
-  // End of special init trickery, subsequent classes may be loaded via FindSystemClass.
+  // End of special init trickery, all subsequent classes may be loaded via FindSystemClass.
 
   // Create java.lang.reflect.Proxy root.
   SetClassRoot(kJavaLangReflectProxy, FindSystemClass(self, "Ljava/lang/reflect/Proxy;"));
@@ -624,7 +635,7 @@
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
   // finish initializing Reference class
   mirror::Class::SetStatus(java_lang_ref_Reference, mirror::Class::kStatusNotReady, self);
-  CHECK_EQ(java_lang_ref_Reference.Get(), FindSystemClass(self, "Ljava/lang/ref/Reference;"));
+  CheckSystemClass(self, java_lang_ref_Reference, "Ljava/lang/ref/Reference;");
   CHECK_EQ(java_lang_ref_Reference->GetObjectSize(), mirror::Reference::InstanceSize());
   CHECK_EQ(java_lang_ref_Reference->GetClassSize(),
            mirror::Reference::ClassSize(image_pointer_size_));
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 71fcf29..56a868a 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -1025,6 +1025,11 @@
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Check that c1 == FindSystemClass(self, descriptor). Abort with class dumps otherwise.
+  void CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   std::vector<const DexFile*> boot_class_path_;
   std::vector<std::unique_ptr<const DexFile>> boot_dex_files_;
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 5161175..7e7d904 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -299,8 +299,10 @@
 }
 
 template<FindFieldType type, bool access_check>
-inline ArtField* FindFieldFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                           Thread* self, size_t expected_size) {
+inline ArtField* FindFieldFromCode(uint32_t field_idx,
+                                   ArtMethod* referrer,
+                                   Thread* self,
+                                   size_t expected_size) REQUIRES(!Roles::uninterruptible_) {
   bool is_primitive;
   bool is_set;
   bool is_static;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index b5a55bf..3dfad76 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -273,15 +273,15 @@
     if (outer_method != nullptr) {
       const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
       if (current_code->IsOptimized()) {
-          uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-          CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-          StackMapEncoding encoding = code_info.ExtractEncoding();
-          StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-          DCHECK(stack_map.IsValid());
-          if (stack_map.HasInlineInfo(encoding)) {
-            InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-            caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-          }
+        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+        StackMapEncoding encoding = code_info.ExtractEncoding();
+        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+        DCHECK(stack_map.IsValid());
+        if (stack_map.HasInlineInfo(encoding)) {
+          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+        }
       }
     }
     if (kIsDebugBuild && do_caller_check) {
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 1850254..a245f18 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -27,7 +27,36 @@
 
 namespace art {
 
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
+inline constexpr bool FindFieldTypeIsRead(FindFieldType type) {
+  return type == InstanceObjectRead ||
+         type == InstancePrimitiveRead ||
+         type == StaticObjectRead ||
+         type == StaticPrimitiveRead;
+}
+
+// Helper function to do a null check after trying to resolve the field. Not for statics since obj
+// does not exist there. There is a suspend check, object is a double pointer to update the value
+// in the caller in case it moves.
+template<FindFieldType type, bool kAccessCheck>
+ALWAYS_INLINE static inline ArtField* FindInstanceField(uint32_t field_idx,
+                                                        ArtMethod* referrer,
+                                                        Thread* self,
+                                                        size_t size,
+                                                        mirror::Object** obj)
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(self);
+  HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(obj));
+  ArtField* field = FindFieldFromCode<type, kAccessCheck>(field_idx, referrer, self, size);
+  if (LIKELY(field != nullptr) && UNLIKELY(h.Get() == nullptr)) {
+    ThrowNullPointerExceptionForFieldAccess(field, /*is_read*/FindFieldTypeIsRead(type));
+    return nullptr;
+  }
+  return field;
+}
+
+extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx,
+                                           ArtMethod* referrer,
                                            Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -42,7 +71,8 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
+extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx,
+                                               ArtMethod* referrer,
                                                Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -57,7 +87,8 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
+extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx,
+                                             ArtMethod* referrer,
                                              Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -125,12 +156,16 @@
                                                    Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  StaticObjectRead,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
   }
-  field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, self,
+  field = FindFieldFromCode<StaticObjectRead, true>(field_idx,
+                                                    referrer,
+                                                    self,
                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
@@ -138,149 +173,159 @@
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetByte(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int8_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int8_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetByte(obj);
-    }
+    return field->GetByte(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                                 ArtMethod* referrer, Thread* self)
+extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx,
+                                                 mirror::Object* obj,
+                                                 ArtMethod* referrer,
+                                                 Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetBoolean(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int8_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int8_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetBoolean(obj);
-    }
+    return field->GetBoolean(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
-extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               ArtMethod* referrer, Thread* self)
+extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx,
+                                               mirror::Object* obj,
+                                               ArtMethod* referrer,
+                                               Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetShort(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int16_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int16_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetShort(obj);
-    }
+    return field->GetShort(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               ArtMethod* referrer, Thread* self)
+extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx,
+                                               mirror::Object* obj,
+                                               ArtMethod* referrer,
+                                               Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetChar(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int16_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int16_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetChar(obj);
-    }
+    return field->GetChar(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get32(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int32_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int32_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->Get32(obj);
-    }
+    return field->Get32(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int64_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get64(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int64_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int64_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->Get64(obj);
-    }
+    return field->Get64(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
+extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx,
+                                                     mirror::Object* obj,
                                                      ArtMethod* referrer,
                                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  InstanceObjectRead,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetObj(obj);
   }
-  field = FindFieldFromCode<InstanceObjectRead, true>(
-      field_idx, referrer, self, sizeof(mirror::HeapReference<mirror::Object>));
+  field = FindInstanceField<InstanceObjectRead, true>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      sizeof(mirror::HeapReference<mirror::Object>),
+                                                      &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetObj(obj);
-    }
+    return field->GetObj(obj);
   }
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int artSet8StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                     ArtMethod* referrer, Thread* self)
+extern "C" int artSet8StaticFromCode(uint32_t field_idx,
+                                     uint32_t new_value,
+                                     ArtMethod* referrer,
+                                     Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int8_t));
@@ -310,8 +355,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet16StaticFromCode(uint32_t field_idx, uint16_t new_value,
-                                      ArtMethod* referrer, Thread* self)
+extern "C" int artSet16StaticFromCode(uint32_t field_idx,
+                                      uint16_t new_value,
+                                      ArtMethod* referrer,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int16_t));
@@ -341,8 +388,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      ArtMethod* referrer, Thread* self)
+extern "C" int artSet32StaticFromCode(uint32_t field_idx,
+                                      uint32_t new_value,
+                                      ArtMethod* referrer,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int32_t));
@@ -360,8 +409,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64StaticFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                      uint64_t new_value, Thread* self)
+extern "C" int artSet64StaticFromCode(uint32_t field_idx,
+                                      ArtMethod* referrer,
+                                      uint64_t new_value,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
@@ -379,11 +430,15 @@
   return -1;  // failure
 }
 
-extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
-                                       ArtMethod* referrer, Thread* self)
+extern "C" int artSetObjStaticFromCode(uint32_t field_idx,
+                                       mirror::Object* new_value,
+                                       ArtMethod* referrer,
+                                       Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  StaticObjectWrite,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     if (LIKELY(!field->IsPrimitiveType())) {
@@ -392,8 +447,15 @@
       return 0;  // success
     }
   }
-  field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, self,
-                                                     sizeof(mirror::HeapReference<mirror::Object>));
+  {
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&new_value));
+    field = FindFieldFromCode<StaticObjectWrite, true>(
+        field_idx,
+        referrer,
+        self,
+        sizeof(mirror::HeapReference<mirror::Object>));
+  }
   if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(field->GetDeclaringClass(), new_value);
@@ -402,8 +464,11 @@
   return -1;  // failure
 }
 
-extern "C" int artSet8InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
-                                       ArtMethod* referrer, Thread* self)
+extern "C" int artSet8InstanceFromCode(uint32_t field_idx,
+                                       mirror::Object* obj,
+                                       uint8_t new_value,
+                                       ArtMethod* referrer,
+                                       Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
@@ -418,31 +483,29 @@
     }
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int8_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int8_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    // Compiled code can't use transactional mode.
+    if (type == Primitive::kPrimBoolean) {
+      field->SetBoolean<false>(obj, new_value);
     } else {
-      Primitive::Type type = field->GetTypeAsPrimitiveType();
-      // Compiled code can't use transactional mode.
-      if (type == Primitive::kPrimBoolean) {
-        field->SetBoolean<false>(obj, new_value);
-      } else {
-        field->SetByte<false>(obj, new_value);
-      }
-      return 0;  // success
+      field->SetByte<false>(obj, new_value);
     }
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet16InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet16InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint16_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int16_t));
@@ -457,32 +520,30 @@
     }
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int16_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int16_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    // Compiled code can't use transactional mode.
+    if (type == Primitive::kPrimChar) {
+      field->SetChar<false>(obj, new_value);
     } else {
-      Primitive::Type type = field->GetTypeAsPrimitiveType();
-      // Compiled code can't use transactional mode.
-      if (type == Primitive::kPrimChar) {
-        field->SetChar<false>(obj, new_value);
-      } else {
-        DCHECK_EQ(Primitive::kPrimShort, type);
-        field->SetShort<false>(obj, new_value);
-      }
-      return 0;  // success
+      DCHECK_EQ(Primitive::kPrimShort, type);
+      field->SetShort<false>(obj, new_value);
     }
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet32InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint32_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int32_t));
@@ -491,26 +552,24 @@
     field->Set32<false>(obj, new_value);
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int32_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int32_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
-    } else {
-      // Compiled code can't use transactional mode.
-      field->Set32<false>(obj, new_value);
-      return 0;  // success
-    }
+    // Compiled code can't use transactional mode.
+    field->Set32<false>(obj, new_value);
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet64InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint64_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int64_t));
@@ -519,34 +578,45 @@
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                          sizeof(int64_t));
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int64_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
-    } else {
-      // Compiled code can't use transactional mode.
-      field->Set64<false>(obj, new_value);
-      return 0;  // success
-    }
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(obj, new_value);
+    return 0;
   }
   return -1;  // failure
 }
 
-extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
+extern "C" int artSetObjInstanceFromCode(uint32_t field_idx,
+                                         mirror::Object* obj,
                                          mirror::Object* new_value,
-                                         ArtMethod* referrer, Thread* self)
+                                         ArtMethod* referrer,
+                                         Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  InstanceObjectWrite,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(obj, new_value);
     return 0;  // success
   }
-  field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, self,
-                                                       sizeof(mirror::HeapReference<mirror::Object>));
+  {
+    StackHandleScope<2> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+    HandleWrapper<mirror::Object> h_new_value(hs.NewHandleWrapper(&new_value));
+    field = FindFieldFromCode<InstanceObjectWrite, true>(
+        field_idx,
+        referrer,
+        self,
+        sizeof(mirror::HeapReference<mirror::Object>));
+  }
   if (LIKELY(field != nullptr)) {
     if (UNLIKELY(obj == nullptr)) {
       ThrowNullPointerExceptionForFieldAccess(field, false);
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f87d48d..e72809b 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -127,7 +127,7 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, rosalloc_runs, thread_local_alloc_stack_top,
-                        sizeof(void*) * kNumRosAllocThreadLocalSizeBrackets);
+                        sizeof(void*) * kNumRosAllocThreadLocalSizeBracketsInThread);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, sizeof(void*));
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index 2510514..d1c81e3 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -62,11 +62,6 @@
   }
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
   DCHECK_LT(idx, kNumThreadLocalSizeBrackets);
   Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
   if (kIsDebugBuild) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 7d00094..8b125dd 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -638,11 +638,6 @@
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
   if (LIKELY(slot_addr != nullptr)) {
@@ -662,14 +657,7 @@
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
-
   void* slot_addr;
-
   if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
     // Use a thread-local run.
     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
@@ -881,17 +869,6 @@
   return stream.str();
 }
 
-inline size_t RosAlloc::Run::SlotIndex(Slot* slot) {
-  const uint8_t idx = size_bracket_idx_;
-  const size_t bracket_size = bracketSizes[idx];
-  const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
-      - reinterpret_cast<uint8_t*>(FirstSlot());
-  DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
-  size_t slot_idx = offset_from_slot_base / bracket_size;
-  DCHECK_LT(slot_idx, numOfSlots[idx]);
-  return slot_idx;
-}
-
 void RosAlloc::Run::FreeSlot(void* ptr) {
   DCHECK(!IsThreadLocal());
   const uint8_t idx = size_bracket_idx_;
@@ -1647,9 +1624,14 @@
 
 void RosAlloc::Initialize() {
   // bracketSizes.
+  static_assert(kNumRegularSizeBrackets == kNumOfSizeBrackets - 2,
+                "There should be two non-regular brackets");
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    if (i < kNumOfSizeBrackets - 2) {
-      bracketSizes[i] = 16 * (i + 1);
+    if (i < kNumThreadLocalSizeBrackets) {
+      bracketSizes[i] = kThreadLocalBracketQuantumSize * (i + 1);
+    } else if (i < kNumRegularSizeBrackets) {
+      bracketSizes[i] = kBracketQuantumSize * (i - kNumThreadLocalSizeBrackets + 1) +
+          (kThreadLocalBracketQuantumSize *  kNumThreadLocalSizeBrackets);
     } else if (i == kNumOfSizeBrackets - 2) {
       bracketSizes[i] = 1 * KB;
     } else {
@@ -1662,16 +1644,13 @@
   }
   // numOfPages.
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    if (i < 4) {
+    if (i < kNumThreadLocalSizeBrackets) {
       numOfPages[i] = 1;
-    } else if (i < 8) {
-      numOfPages[i] = 1;
-    } else if (i < 16) {
+    } else if (i < (kNumThreadLocalSizeBrackets + kNumRegularSizeBrackets) / 2) {
       numOfPages[i] = 4;
-    } else if (i < 32) {
+    } else if (i < kNumRegularSizeBrackets) {
       numOfPages[i] = 8;
-    } else if (i == 32) {
-      DCHECK_EQ(i, kNumOfSizeBrackets - 2);
+    } else if (i == kNumOfSizeBrackets - 2) {
       numOfPages[i] = 16;
     } else {
       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
@@ -1701,8 +1680,8 @@
       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
           tmp_unaligned_header_size :
           tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
-      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
-      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % bracket_size, 0U);
+      DCHECK_EQ(tmp_header_size % sizeof(uint64_t), 0U);
       if (tmp_slots_size + tmp_header_size <= run_size) {
         // Found the right number of slots, that is, there was enough
         // space for the header (including the bit maps.)
@@ -1711,8 +1690,8 @@
         break;
       }
     }
-    DCHECK_GT(num_of_slots, 0U);
-    DCHECK_GT(header_size, 0U);
+    DCHECK_GT(num_of_slots, 0U) << i;
+    DCHECK_GT(header_size, 0U) << i;
     // Add the padding for the alignment remainder.
     header_size += run_size % bracket_size;
     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
@@ -1723,7 +1702,7 @@
                 << ", headerSizes[" << i << "]=" << headerSizes[i];
     }
   }
-  // Fill the alloc bitmap so nobody can successfully allocate from it.
+  // Set up the dedicated full run so that nobody can successfully allocate from it.
   if (kIsDebugBuild) {
     dedicated_full_run_->magic_num_ = kMagicNum;
   }
@@ -1735,6 +1714,9 @@
 
   // The smallest bracket size must be at least as large as the sizeof(Slot).
   DCHECK_LE(sizeof(Slot), bracketSizes[0]) << "sizeof(Slot) <= the smallest bracket size";
+  // Check the invariants between the max bracket sizes and the number of brackets.
+  DCHECK_EQ(kMaxThreadLocalBracketSize, bracketSizes[kNumThreadLocalSizeBrackets - 1]);
+  DCHECK_EQ(kMaxRegularBracketSize, bracketSizes[kNumRegularSizeBrackets - 1]);
 }
 
 void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 3ce3d63..a472a8b 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -366,7 +366,7 @@
     static size_t fixed_header_size() {
       return sizeof(Run);
     }
-    Slot* FirstSlot() {
+    Slot* FirstSlot() const {
       const uint8_t idx = size_bracket_idx_;
       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
     }
@@ -473,7 +473,16 @@
       DCHECK_LT(slot_idx, numOfSlots[idx]);
       return reinterpret_cast<Slot*>(ptr);
     }
-    size_t SlotIndex(Slot* slot);
+    size_t SlotIndex(Slot* slot) const {
+      const uint8_t idx = size_bracket_idx_;
+      const size_t bracket_size = bracketSizes[idx];
+      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
+          - reinterpret_cast<uint8_t*>(FirstSlot());
+      DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
+      size_t slot_idx = offset_from_slot_base / bracket_size;
+      DCHECK_LT(slot_idx, numOfSlots[idx]);
+      return slot_idx;
+    }
 
     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
   };
@@ -482,10 +491,8 @@
   static constexpr uint8_t kMagicNum = 42;
   // The magic number for free pages.
   static constexpr uint8_t kMagicNumFree = 43;
-  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
-  static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets;
-  // The number of smaller size brackets that are the quantum size apart.
-  static constexpr size_t kNumOfQuantumSizeBrackets = 32;
+  // The number of size brackets.
+  static constexpr size_t kNumOfSizeBrackets = 42;
   // The sizes (the slot sizes, in bytes) of the size brackets.
   static size_t bracketSizes[kNumOfSizeBrackets];
   // The numbers of pages that are used for runs for each size bracket.
@@ -506,16 +513,23 @@
   }
   // Returns the index of the size bracket from the bracket size.
   static size_t BracketSizeToIndex(size_t size) {
-    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    DCHECK(8 <= size &&
+           ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
+            (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
+            size == 1 * KB || size == 2 * KB));
     size_t idx;
     if (UNLIKELY(size == 1 * KB)) {
       idx = kNumOfSizeBrackets - 2;
     } else if (UNLIKELY(size == 2 * KB)) {
       idx = kNumOfSizeBrackets - 1;
+    } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
+      idx = size / kThreadLocalBracketQuantumSize - 1;
     } else {
-      DCHECK(size < 1 * KB);
-      DCHECK_EQ(size % 16, static_cast<size_t>(0));
-      idx = size / 16 - 1;
+      DCHECK(size <= kMaxRegularBracketSize);
+      DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
+      idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
+          + kNumThreadLocalSizeBrackets;
     }
     DCHECK(bracketSizes[idx] == size);
     return idx;
@@ -530,51 +544,64 @@
   // Rounds up the size up the nearest bracket size.
   static size_t RoundToBracketSize(size_t size) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      return RoundUp(size, 16);
-    } else if (512 < size && size <= 1 * KB) {
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      return RoundUp(size, kThreadLocalBracketQuantumSize);
+    } else if (size <= kMaxRegularBracketSize) {
+      return RoundUp(size, kBracketQuantumSize);
+    } else if (UNLIKELY(size <= 1 * KB)) {
       return 1 * KB;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
+      DCHECK_LE(size, 2 * KB);
       return 2 * KB;
     }
   }
   // Returns the size bracket index from the byte size with rounding.
   static size_t SizeToIndex(size_t size) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      return RoundUp(size, 16) / 16 - 1;
-    } else if (512 < size && size <= 1 * KB) {
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
+    } else if (size <= kMaxRegularBracketSize) {
+      return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
+          - 1 + kNumThreadLocalSizeBrackets;
+    } else if (size <= 1 * KB) {
       return kNumOfSizeBrackets - 2;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
+      DCHECK_LE(size, 2 * KB);
       return kNumOfSizeBrackets - 1;
     }
   }
   // A combination of SizeToIndex() and RoundToBracketSize().
   static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      size_t bracket_size = RoundUp(size, 16);
-      *bracket_size_out = bracket_size;
-      size_t idx = bracket_size / 16 - 1;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
-    } else if (512 < size && size <= 1 * KB) {
-      size_t bracket_size = 1024;
-      *bracket_size_out = bracket_size;
-      size_t idx = kNumOfSizeBrackets - 2;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
+    size_t idx;
+    size_t bracket_size;
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
+      idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
+    } else if (size <= kMaxRegularBracketSize) {
+      bracket_size = RoundUp(size, kBracketQuantumSize);
+      idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
+          + kNumThreadLocalSizeBrackets;
+    } else if (size <= 1 * KB) {
+      bracket_size = 1 * KB;
+      idx = kNumOfSizeBrackets - 2;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
-      size_t bracket_size = 2048;
-      *bracket_size_out = bracket_size;
-      size_t idx = kNumOfSizeBrackets - 1;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
+      DCHECK(size <= 2 * KB);
+      bracket_size = 2 * KB;
+      idx = kNumOfSizeBrackets - 1;
     }
+    DCHECK_EQ(idx, SizeToIndex(size)) << idx;
+    DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
+    DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
+    DCHECK_LE(size, bracket_size) << idx;
+    DCHECK(size > kMaxRegularBracketSize ||
+           (size <= kMaxThreadLocalBracketSize &&
+            bracket_size - size < kThreadLocalBracketQuantumSize) ||
+           (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
+    *bracket_size_out = bracket_size;
+    return idx;
   }
+
   // Returns the page map index from an address. Requires that the
   // address is page size aligned.
   size_t ToPageMapIndex(const void* addr) const {
@@ -630,18 +657,37 @@
   // The default value for page_release_size_threshold_.
   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
 
-  // We use thread-local runs for the size Brackets whose indexes
+  // We use thread-local runs for the size brackets whose indexes
   // are less than this index. We use shared (current) runs for the rest.
-  static const size_t kNumThreadLocalSizeBrackets = 8;
+  // Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumThreadLocalSizeBrackets = 16;
+  static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
+                "Mismatch between kNumThreadLocalSizeBrackets and "
+                "kNumRosAllocThreadLocalSizeBracketsInThread");
 
   // The size of the largest bracket we use thread-local runs for.
   // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
   static const size_t kMaxThreadLocalBracketSize = 128;
 
-  // The bracket size increment for the brackets of size <= 512 bytes.
+  // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
+  // this index.
+  static const size_t kNumRegularSizeBrackets = 40;
+
+  // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
+  // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
+  static const size_t kMaxRegularBracketSize = 512;
+
+  // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
+  static constexpr size_t kThreadLocalBracketQuantumSize = 8;
+
+  // Equal to Log2(kThreadLocalBracketQuantumSize).
+  static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
+
+  // The bracket size increment for the non-thread-local, regular brackets (of size <=
+  // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
   static constexpr size_t kBracketQuantumSize = 16;
 
-  // Equal to Log2(kQuantumBracketSizeIncrement).
+  // Equal to Log2(kBracketQuantumSize).
   static constexpr size_t kBracketQuantumSizeShift = 4;
 
  private:
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 940d344..ca8598e 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -21,6 +21,7 @@
 #include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -63,10 +64,15 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BRANCH_INSTRUMENTATION(offset) \
-  do { \
+#define BRANCH_INSTRUMENTATION(offset)                                                            \
+  do {                                                                                            \
+    ArtMethod* method = shadow_frame.GetMethod();                                                 \
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
+    instrumentation->Branch(self, method, dex_pc, offset);                                        \
+    JValue result;                                                                                \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {             \
+      return result;                                                                              \
+    }                                                                                             \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index f606978..25dbab2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -17,6 +17,7 @@
 #include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -69,9 +70,14 @@
     }                                                                                           \
   } while (false)
 
-#define BRANCH_INSTRUMENTATION(offset) \
-  do { \
-    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
+#define BRANCH_INSTRUMENTATION(offset)                                                         \
+  do {                                                                                         \
+    ArtMethod* method = shadow_frame.GetMethod();                                              \
+    instrumentation->Branch(self, method, dex_pc, offset);                                     \
+    JValue result;                                                                             \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
+      return result;                                                                           \
+    }                                                                                          \
   } while (false)
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index fa5c41d..e38a684 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -25,15 +25,19 @@
 #include "jit_code_cache.h"
 #include "jit_instrumentation.h"
 #include "oat_file_manager.h"
+#include "oat_quick_method_header.h"
 #include "offline_profiling_info.h"
 #include "profile_saver.h"
 #include "runtime.h"
 #include "runtime_options.h"
+#include "stack_map.h"
 #include "utils.h"
 
 namespace art {
 namespace jit {
 
+static constexpr bool kEnableOnStackReplacement = false;
+
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
   jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
@@ -43,6 +47,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->compile_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+  // TODO(ngeoffray): Make this a proper option.
+  jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
   jit_options->warmup_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
@@ -121,7 +127,7 @@
     *error_msg = "JIT couldn't find jit_unload entry point";
     return false;
   }
-  jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*)>(
+  jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*, bool)>(
       dlsym(jit_library_handle_, "jit_compile_method"));
   if (jit_compile_method_ == nullptr) {
     dlclose(jit_library_handle_);
@@ -156,7 +162,7 @@
   return true;
 }
 
-bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
+bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) {
   DCHECK(!method->IsRuntimeMethod());
   // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
@@ -171,10 +177,11 @@
     return false;
   }
 
-  if (!code_cache_->NotifyCompilationOf(method, self)) {
+  if (!code_cache_->NotifyCompilationOf(method, self, osr)) {
+    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache";
     return false;
   }
-  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self, osr);
   code_cache_->DoneCompiling(method, self);
   return success;
 }
@@ -224,9 +231,11 @@
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold,
+                                     size_t warmup_threshold,
+                                     size_t osr_threshold) {
   instrumentation_cache_.reset(
-      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
+      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold, osr_threshold));
 }
 
 void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
@@ -255,5 +264,128 @@
   }
 }
 
+extern "C" void art_quick_osr_stub(void** stack,
+                                   uint32_t stack_size_in_bytes,
+                                   const uint8_t* native_pc,
+                                   JValue* result,
+                                   const char* shorty,
+                                   Thread* self);
+
+bool Jit::MaybeDoOnStackReplacement(Thread* thread,
+                                    ArtMethod* method,
+                                    uint32_t dex_pc,
+                                    int32_t dex_pc_offset,
+                                    JValue* result) {
+  if (!kEnableOnStackReplacement) {
+    return false;
+  }
+
+  Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return false;
+  }
+
+  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
+    VLOG(jit) << "OSR not supported on this platform";
+    return false;
+  }
+
+  // Get the actual Java method if this method is from a proxy class. The compiler
+  // and the JIT code cache do not expect methods from proxy classes.
+  method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+
+  // Cheap check if the method has been compiled already. That's an indicator that we should
+  // osr into it.
+  if (!jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+
+  const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method);
+  if (osr_method == nullptr) {
+    // No osr method yet, just return to the interpreter.
+    return false;
+  }
+
+  const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
+  CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+
+  // Find stack map starting at the target dex_pc.
+  StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
+  if (!stack_map.IsValid()) {
+    // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the
+    // hope that the next branch has one.
+    return false;
+  }
+
+  // We found a stack map, now fill the frame with dex register values from the interpreter's
+  // shadow frame.
+  DexRegisterMap vreg_map =
+      code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+  ShadowFrame* shadow_frame = thread->PopShadowFrame();
+
+  size_t frame_size = osr_method->GetFrameSizeInBytes();
+  void** memory = reinterpret_cast<void**>(malloc(frame_size));
+  memset(memory, 0, frame_size);
+
+  // Art ABI: ArtMethod is at the bottom of the stack.
+  memory[0] = method;
+
+  if (!vreg_map.IsValid()) {
+    // If we don't have a dex register map, then there are no live dex registers at
+    // this dex pc.
+  } else {
+    for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+      DexRegisterLocation::Kind location =
+          vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+      if (location == DexRegisterLocation::Kind::kNone) {
+        // Dex register is dead or unitialized.
+        continue;
+      }
+
+      if (location == DexRegisterLocation::Kind::kConstant) {
+        // We skip constants because the compiled code knows how to handle them.
+        continue;
+      }
+
+      DCHECK(location == DexRegisterLocation::Kind::kInStack);
+
+      int32_t vreg_value = shadow_frame->GetVReg(vreg);
+      int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
+                                                           number_of_vregs,
+                                                           code_info,
+                                                           encoding);
+      DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size));
+      DCHECK_GT(slot_offset, 0);
+      (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value;
+    }
+  }
+
+  const uint8_t* native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint();
+  VLOG(jit) << "Jumping to "
+            << PrettyMethod(method)
+            << "@"
+            << std::hex << reinterpret_cast<uintptr_t>(native_pc);
+  {
+    ManagedStack fragment;
+    thread->PushManagedStackFragment(&fragment);
+    (*art_quick_osr_stub)(memory,
+                          frame_size,
+                          native_pc,
+                          result,
+                          method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(),
+                          thread);
+    if (UNLIKELY(thread->GetException() == Thread::GetDeoptimizationException())) {
+      thread->DeoptimizeWithDeoptimizationException(result);
+    }
+    thread->PopManagedStackFragment(fragment);
+  }
+  free(memory);
+  thread->PushShadowFrame(shadow_frame);
+  VLOG(jit) << "Done running OSR code for " << PrettyMethod(method);
+  return true;
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index a80f51f..042da92 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -49,9 +49,11 @@
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
-  bool CompileMethod(ArtMethod* method, Thread* self)
+  bool CompileMethod(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
+  void CreateInstrumentationCache(size_t compile_threshold,
+                                  size_t warmup_threshold,
+                                  size_t osr_threshold);
   void CreateThreadPool();
   CompilerCallbacks* GetCompilerCallbacks() {
     return compiler_callbacks_;
@@ -88,6 +90,17 @@
 
   bool JitAtFirstUse();
 
+  // If an OSR compiled version is available for `method`,
+  // and `dex_pc + dex_pc_offset` is an entry point of that compiled
+  // version, this method will jump to the compiled code, let it run,
+  // and return true afterwards. Return false otherwise.
+  static bool MaybeDoOnStackReplacement(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc,
+                                        int32_t dex_pc_offset,
+                                        JValue* result)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -97,7 +110,7 @@
   void* jit_compiler_handle_;
   void* (*jit_load_)(CompilerCallbacks**, bool*);
   void (*jit_unload_)(void*);
-  bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
+  bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
   void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
 
   // Performance monitoring.
@@ -123,6 +136,9 @@
   size_t GetWarmupThreshold() const {
     return warmup_threshold_;
   }
+  size_t GetOsrThreshold() const {
+    return osr_threshold_;
+  }
   size_t GetCodeCacheInitialCapacity() const {
     return code_cache_initial_capacity_;
   }
@@ -155,6 +171,7 @@
   size_t code_cache_max_capacity_;
   size_t compile_threshold_;
   size_t warmup_threshold_;
+  size_t osr_threshold_;
   bool dump_info_on_shutdown_;
   bool save_profiling_info_;
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index f325949..464c441 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -184,7 +184,8 @@
                                   size_t core_spill_mask,
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
-                                  size_t code_size) {
+                                  size_t code_size,
+                                  bool osr) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        mapping_table,
@@ -194,7 +195,8 @@
                                        core_spill_mask,
                                        fp_spill_mask,
                                        code,
-                                       code_size);
+                                       code_size,
+                                       osr);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
@@ -207,7 +209,8 @@
                                 core_spill_mask,
                                 fp_spill_mask,
                                 code,
-                                code_size);
+                                code_size,
+                                osr);
   }
   return result;
 }
@@ -287,7 +290,8 @@
                                           size_t core_spill_mask,
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
-                                          size_t code_size) {
+                                          size_t code_size,
+                                          bool osr) {
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -329,8 +333,12 @@
   {
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
-    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-        method, method_header->GetEntryPoint());
+    if (osr) {
+      osr_code_map_.Put(method, code_ptr);
+    } else {
+      Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+          method, method_header->GetEntryPoint());
+    }
     if (collection_in_progress_) {
       // We need to update the live bitmap if there is a GC to ensure it sees this new
       // code.
@@ -338,7 +346,7 @@
     }
     last_update_time_ns_.StoreRelease(NanoTime());
     VLOG(jit)
-        << "JIT added "
+        << "JIT added (osr = " << std::boolalpha << osr << std::noboolalpha << ") "
         << PrettyMethod(method) << "@" << method
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
@@ -569,6 +577,10 @@
         info->GetMethod()->SetProfilingInfo(nullptr);
       }
     }
+
+    // Empty osr method map, as osr compile code will be deleted (except the ones
+    // on thread stacks).
+    osr_code_map_.clear();
   }
 
   // Run a checkpoint on all threads to mark the JIT compiled code they are running.
@@ -662,6 +674,15 @@
   return method_header;
 }
 
+OatQuickMethodHeader* JitCodeCache::LookupOsrMethodHeader(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  auto it = osr_code_map_.find(method);
+  if (it == osr_code_map_.end()) {
+    return nullptr;
+  }
+  return OatQuickMethodHeader::FromCodePointer(it->second);
+}
+
 ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self,
                                               ArtMethod* method,
                                               const std::vector<uint32_t>& entries,
@@ -733,12 +754,15 @@
   return last_update_time_ns_.LoadAcquire();
 }
 
-bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
-  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) {
+  if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
     return false;
   }
 
   MutexLock mu(self, lock_);
+  if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) {
+    return false;
+  }
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
   if (info == nullptr || info->IsMethodBeingCompiled()) {
     return false;
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 69fc553..048f8d0 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -71,7 +71,7 @@
   // Number of compilations done throughout the lifetime of the JIT.
   size_t NumberOfCompilations() REQUIRES(!lock_);
 
-  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -89,7 +89,8 @@
                       size_t core_spill_mask,
                       size_t fp_spill_mask,
                       const uint8_t* code,
-                      size_t code_size)
+                      size_t code_size,
+                      bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -131,6 +132,10 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  OatQuickMethodHeader* LookupOsrMethodHeader(ArtMethod* method)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Remove all methods in our cache that were allocated by 'alloc'.
   void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
       REQUIRES(!lock_)
@@ -187,7 +192,8 @@
                               size_t core_spill_mask,
                               size_t fp_spill_mask,
                               const uint8_t* code,
-                              size_t code_size)
+                              size_t code_size,
+                              bool osr)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -237,8 +243,10 @@
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
-  // This map holds compiled code associated to the ArtMethod.
+  // Holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
+  // Holds osr compiled code associated to the ArtMethod.
+  SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d597b36..a4e40ad 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -29,7 +29,8 @@
  public:
   enum TaskKind {
     kAllocateProfile,
-    kCompile
+    kCompile,
+    kCompileOsr
   };
 
   JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
@@ -48,9 +49,14 @@
     ScopedObjectAccess soa(self);
     if (kind_ == kCompile) {
       VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) {
         VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
       }
+    } else if (kind_ == kCompileOsr) {
+      VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_);
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) {
+        VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_);
+      }
     } else {
       DCHECK(kind_ == kAllocateProfile);
       if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
@@ -72,9 +78,11 @@
 };
 
 JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
-                                                 size_t warm_method_threshold)
+                                                 size_t warm_method_threshold,
+                                                 size_t osr_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
       warm_method_threshold_(warm_method_threshold),
+      osr_method_threshold_(osr_method_threshold),
       listener_(this) {
 }
 
@@ -151,6 +159,11 @@
     DCHECK(thread_pool_ != nullptr);
     thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
   }
+
+  if (sample_count == osr_method_threshold_) {
+    DCHECK(thread_pool_ != nullptr);
+    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+  }
 }
 
 JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 06559ad..d1c5c44 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -96,7 +96,9 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
+  JitInstrumentationCache(size_t hot_method_threshold,
+                          size_t warm_method_threshold,
+                          size_t osr_method_threshold);
   void AddSamples(Thread* self, ArtMethod* method, size_t samples)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
@@ -112,6 +114,7 @@
  private:
   size_t hot_method_threshold_;
   size_t warm_method_threshold_;
+  size_t osr_method_threshold_;
   JitInstrumentationListener listener_;
   std::unique_ptr<ThreadPool> thread_pool_;
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 5643739..2b7eca2 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -108,7 +108,7 @@
   }
 
   template <bool kCheckFrameSize = true>
-  uint32_t GetFrameSizeInBytes() {
+  uint32_t GetFrameSizeInBytes() const {
     uint32_t result = frame_info_.FrameSizeInBytes();
     if (kCheckFrameSize) {
       DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b1b7473..1b59c6f 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1887,7 +1887,8 @@
   if (jit_.get() != nullptr) {
     compiler_callbacks_ = jit_->GetCompilerCallbacks();
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
-                                     jit_options_->GetWarmupThreshold());
+                                     jit_options_->GetWarmupThreshold(),
+                                     jit_options_->GetOsrThreshold());
     jit_->CreateThreadPool();
 
     // Notify native debugger about the classes already loaded before the creation of the jit.
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 122dcb1..8237b06 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -114,6 +114,9 @@
       switch (signal_code) {
         case SEGV_MAPERR: return "SEGV_MAPERR";
         case SEGV_ACCERR: return "SEGV_ACCERR";
+#if defined(SEGV_BNDERR)
+        case SEGV_BNDERR: return "SEGV_BNDERR";
+#endif
       }
       break;
     case SIGTRAP:
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 84185ce..97eb805 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -1195,6 +1195,35 @@
     return StackMap();
   }
 
+  StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
+    size_t e = GetNumberOfStackMaps();
+    if (e == 0) {
+      // There cannot be OSR stack map if there is no stack map.
+      return StackMap();
+    }
+    // Walk over all stack maps. If two consecutive stack maps are identical, then we
+    // have found a stack map suitable for OSR.
+    for (size_t i = 0; i < e - 1; ++i) {
+      StackMap stack_map = GetStackMapAt(i, encoding);
+      if (stack_map.GetDexPc(encoding) == dex_pc) {
+        StackMap other = GetStackMapAt(i + 1, encoding);
+        if (other.GetDexPc(encoding) == dex_pc &&
+            other.GetNativePcOffset(encoding) == stack_map.GetNativePcOffset(encoding)) {
+          DCHECK_EQ(other.GetDexRegisterMapOffset(encoding),
+                    stack_map.GetDexRegisterMapOffset(encoding));
+          DCHECK(!stack_map.HasInlineInfo(encoding));
+          if (i < e - 2) {
+            // Make sure there are not three identical stack maps following each other.
+            DCHECK_NE(stack_map.GetNativePcOffset(encoding),
+                      GetStackMapAt(i + 2, encoding).GetNativePcOffset(encoding));
+          }
+          return stack_map;
+        }
+      }
+    }
+    return StackMap();
+  }
+
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset,
                                         const StackMapEncoding& encoding) const {
     // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 2abcd67..7a45594 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1599,7 +1599,7 @@
   tls32_.state_and_flags.as_struct.state = kNative;
   memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
   std::fill(tlsPtr_.rosalloc_runs,
-            tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBrackets,
+            tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
   for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
     tlsPtr_.checkpoint_functions[i] = nullptr;
@@ -3012,4 +3012,25 @@
   return count;
 }
 
+
+void Thread::DeoptimizeWithDeoptimizationException(JValue* result) {
+  DCHECK_EQ(GetException(), Thread::GetDeoptimizationException());
+  ClearException();
+  ShadowFrame* shadow_frame =
+      PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
+  mirror::Throwable* pending_exception = nullptr;
+  bool from_code = false;
+  PopDeoptimizationContext(result, &pending_exception, &from_code);
+  CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization";
+  SetTopOfStack(nullptr);
+  SetTopOfShadowStack(shadow_frame);
+
+  // Restore the exception that was pending before deoptimization then interpret the
+  // deoptimized frames.
+  if (pending_exception != nullptr) {
+    SetException(pending_exception);
+  }
+  interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result);
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index d7887ca..3a5d72e 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -113,7 +113,8 @@
   kSingleFrameDeoptimizationShadowFrame
 };
 
-static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
+// This should match RosAlloc::kNumThreadLocalSizeBrackets.
+static constexpr size_t kNumRosAllocThreadLocalSizeBracketsInThread = 16;
 
 // Thread's stack layout for implicit stack overflow checks:
 //
@@ -552,6 +553,9 @@
         OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
   }
 
+  // Deoptimize the Java stack.
+  void DeoptimizeWithDeoptimizationException(JValue* result) SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
@@ -1421,7 +1425,7 @@
     void* mterp_alt_ibase;
 
     // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread.
-    void* rosalloc_runs[kNumRosAllocThreadLocalSizeBrackets];
+    void* rosalloc_runs[kNumRosAllocThreadLocalSizeBracketsInThread];
 
     // Thread-local allocation stack data/routines.
     StackReference<mirror::Object>* thread_local_alloc_stack_top;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 56154c6..c7ac172 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -58,6 +58,10 @@
 // On VLOG(verifier), should we dump the whole state when we run into a hard failure?
 static constexpr bool kDumpRegLinesOnHardFailureIfVLOG = true;
 
+// We print a warning blurb about "dx --no-optimize" when we find monitor-locking issues. Make
+// sure we only print this once.
+static bool gPrintedDxMonitorText = false;
+
 PcToRegisterLineTable::PcToRegisterLineTable(ScopedArenaAllocator& arena)
     : register_lines_(arena.Adapter(kArenaAllocVerifier)) {}
 
@@ -166,23 +170,38 @@
   return kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod();
 }
 
+static MethodVerifier::FailureKind FailureKindMax(MethodVerifier::FailureKind fk1,
+                                                  MethodVerifier::FailureKind fk2) {
+  static_assert(MethodVerifier::FailureKind::kNoFailure <
+                    MethodVerifier::FailureKind::kSoftFailure
+                && MethodVerifier::FailureKind::kSoftFailure <
+                       MethodVerifier::FailureKind::kHardFailure,
+                "Unexpected FailureKind order");
+  return std::max(fk1, fk2);
+}
+
+void MethodVerifier::FailureData::Merge(const MethodVerifier::FailureData& fd) {
+  kind = FailureKindMax(kind, fd.kind);
+  types |= fd.types;
+}
+
 template <bool kDirect>
-void MethodVerifier::VerifyMethods(Thread* self,
-                                   ClassLinker* linker,
-                                   const DexFile* dex_file,
-                                   const DexFile::ClassDef* class_def,
-                                   ClassDataItemIterator* it,
-                                   Handle<mirror::DexCache> dex_cache,
-                                   Handle<mirror::ClassLoader> class_loader,
-                                   CompilerCallbacks* callbacks,
-                                   bool allow_soft_failures,
-                                   bool log_hard_failures,
-                                   bool need_precise_constants,
-                                   bool* hard_fail,
-                                   size_t* error_count,
-                                   std::string* error_string) {
+MethodVerifier::FailureData MethodVerifier::VerifyMethods(Thread* self,
+                                                          ClassLinker* linker,
+                                                          const DexFile* dex_file,
+                                                          const DexFile::ClassDef* class_def,
+                                                          ClassDataItemIterator* it,
+                                                          Handle<mirror::DexCache> dex_cache,
+                                                          Handle<mirror::ClassLoader> class_loader,
+                                                          CompilerCallbacks* callbacks,
+                                                          bool allow_soft_failures,
+                                                          bool log_hard_failures,
+                                                          bool need_precise_constants,
+                                                          std::string* error_string) {
   DCHECK(it != nullptr);
 
+  MethodVerifier::FailureData failure_data;
+
   int64_t previous_method_idx = -1;
   while (HasNextMethod<kDirect>(it)) {
     self->AllowThreadSuspension();
@@ -206,7 +225,7 @@
     }
     StackHandleScope<1> hs(self);
     std::string hard_failure_msg;
-    MethodVerifier::FailureKind result = VerifyMethod(self,
+    MethodVerifier::FailureData result = VerifyMethod(self,
                                                       method_idx,
                                                       dex_file,
                                                       dex_cache,
@@ -220,24 +239,24 @@
                                                       log_hard_failures,
                                                       need_precise_constants,
                                                       &hard_failure_msg);
-    if (result != kNoFailure) {
-      if (result == kHardFailure) {
-        if (*error_count > 0) {
-          *error_string += "\n";
-        }
-        if (!*hard_fail) {
-          *error_string += "Verifier rejected class ";
-          *error_string += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
-          *error_string += ":";
-        }
-        *error_string += " ";
-        *error_string += hard_failure_msg;
-        *hard_fail = true;
+    if (result.kind == kHardFailure) {
+      if (failure_data.kind == kHardFailure) {
+        // If we logged an error before, we need a newline.
+        *error_string += "\n";
+      } else {
+        // If we didn't log a hard failure before, print the header of the message.
+        *error_string += "Verifier rejected class ";
+        *error_string += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
+        *error_string += ":";
       }
-      *error_count = *error_count + 1;
+      *error_string += " ";
+      *error_string += hard_failure_msg;
     }
+    failure_data.Merge(result);
     it->Next();
   }
+
+  return failure_data;
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
@@ -268,44 +287,53 @@
   while (it.HasNextStaticField() || it.HasNextInstanceField()) {
     it.Next();
   }
-  size_t error_count = 0;
-  bool hard_fail = false;
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
   // Direct methods.
-  VerifyMethods<true>(self,
-                      linker,
-                      dex_file,
-                      class_def,
-                      &it,
-                      dex_cache,
-                      class_loader,
-                      callbacks,
-                      allow_soft_failures,
-                      log_hard_failures,
-                      false /* need precise constants */,
-                      &hard_fail,
-                      &error_count,
-                      error);
+  MethodVerifier::FailureData data1 = VerifyMethods<true>(self,
+                                                          linker,
+                                                          dex_file,
+                                                          class_def,
+                                                          &it,
+                                                          dex_cache,
+                                                          class_loader,
+                                                          callbacks,
+                                                          allow_soft_failures,
+                                                          log_hard_failures,
+                                                          false /* need precise constants */,
+                                                          error);
   // Virtual methods.
-  VerifyMethods<false>(self,
-                      linker,
-                      dex_file,
-                      class_def,
-                      &it,
-                      dex_cache,
-                      class_loader,
-                      callbacks,
-                      allow_soft_failures,
-                      log_hard_failures,
-                      false /* need precise constants */,
-                      &hard_fail,
-                      &error_count,
-                      error);
+  MethodVerifier::FailureData data2 = VerifyMethods<false>(self,
+                                                           linker,
+                                                           dex_file,
+                                                           class_def,
+                                                           &it,
+                                                           dex_cache,
+                                                           class_loader,
+                                                           callbacks,
+                                                           allow_soft_failures,
+                                                           log_hard_failures,
+                                                           false /* need precise constants */,
+                                                           error);
 
-  if (error_count == 0) {
+  data1.Merge(data2);
+
+  if (data1.kind == kNoFailure) {
     return kNoFailure;
   } else {
-    return hard_fail ? kHardFailure : kSoftFailure;
+    if ((data1.types & VERIFY_ERROR_LOCKING) != 0) {
+      // Print a warning about expected slow-down. Use a string temporary to print one contiguous
+      // warning.
+      std::string tmp =
+          StringPrintf("Class %s failed lock verification and will run slower.",
+                       PrettyDescriptor(dex_file->GetClassDescriptor(*class_def)).c_str());
+      if (!gPrintedDxMonitorText) {
+        tmp = tmp + "\nCommon causes for lock verification issues are non-optimized dex code\n"
+                    "and incorrect proguard optimizations.";
+        gPrintedDxMonitorText = true;
+      }
+      LOG(WARNING) << tmp;
+    }
+    return data1.kind;
   }
 }
 
@@ -320,7 +348,7 @@
   return registers_size * insns_size > 4*1024*1024;
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyMethod(Thread* self,
+MethodVerifier::FailureData MethodVerifier::VerifyMethod(Thread* self,
                                                          uint32_t method_idx,
                                                          const DexFile* dex_file,
                                                          Handle<mirror::DexCache> dex_cache,
@@ -334,7 +362,7 @@
                                                          bool log_hard_failures,
                                                          bool need_precise_constants,
                                                          std::string* hard_failure_msg) {
-  MethodVerifier::FailureKind result = kNoFailure;
+  MethodVerifier::FailureData result;
   uint64_t start_ns = kTimeVerifyMethod ? NanoTime() : 0;
 
   MethodVerifier verifier(self, dex_file, dex_cache, class_loader, class_def, code_item,
@@ -355,7 +383,7 @@
         verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
                                                     << PrettyMethod(method_idx, *dex_file) << "\n");
       }
-      result = kSoftFailure;
+      result.kind = kSoftFailure;
     }
   } else {
     // Bad method data.
@@ -364,7 +392,7 @@
     if (UNLIKELY(verifier.have_pending_experimental_failure_)) {
       // Failed due to being forced into interpreter. This is ok because
       // we just want to skip verification.
-      result = kSoftFailure;
+      result.kind = kSoftFailure;
     } else {
       CHECK(verifier.have_pending_hard_failure_);
       if (VLOG_IS_ON(verifier) || log_hard_failures) {
@@ -376,7 +404,7 @@
         *hard_failure_msg =
             verifier.failure_messages_[verifier.failure_messages_.size() - 1]->str();
       }
-      result = kHardFailure;
+      result.kind = kHardFailure;
 
       if (callbacks != nullptr) {
         // Let the interested party know that we failed the class.
@@ -397,6 +425,7 @@
                    << (IsLargeMethod(code_item) ? " (large method)" : "");
     }
   }
+  result.types = verifier.encountered_failure_types_;
   return result;
 }
 
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 613d5af..c7d1e6b 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -25,6 +25,7 @@
 #include "base/macros.h"
 #include "base/scoped_arena_containers.h"
 #include "base/stl_util.h"
+#include "base/value_object.h"
 #include "dex_file.h"
 #include "handle.h"
 #include "instruction_flags.h"
@@ -344,23 +345,31 @@
   // Adds the given string to the end of the last failure message.
   void AppendToLastFailMessage(std::string);
 
+  // Verification result for method(s). Includes a (maximum) failure kind, and (the union of)
+  // all failure types.
+  struct FailureData : ValueObject {
+    FailureKind kind = kNoFailure;
+    uint32_t types = 0U;
+
+    // Merge src into this. Uses the most severe failure kind, and the union of types.
+    void Merge(const FailureData& src);
+  };
+
   // Verify all direct or virtual methods of a class. The method assumes that the iterator is
   // positioned correctly, and the iterator will be updated.
   template <bool kDirect>
-  static void VerifyMethods(Thread* self,
-                            ClassLinker* linker,
-                            const DexFile* dex_file,
-                            const DexFile::ClassDef* class_def,
-                            ClassDataItemIterator* it,
-                            Handle<mirror::DexCache> dex_cache,
-                            Handle<mirror::ClassLoader> class_loader,
-                            CompilerCallbacks* callbacks,
-                            bool allow_soft_failures,
-                            bool log_hard_failures,
-                            bool need_precise_constants,
-                            bool* hard_fail,
-                            size_t* error_count,
-                            std::string* error_string)
+  static FailureData VerifyMethods(Thread* self,
+                                   ClassLinker* linker,
+                                   const DexFile* dex_file,
+                                   const DexFile::ClassDef* class_def,
+                                   ClassDataItemIterator* it,
+                                   Handle<mirror::DexCache> dex_cache,
+                                   Handle<mirror::ClassLoader> class_loader,
+                                   CompilerCallbacks* callbacks,
+                                   bool allow_soft_failures,
+                                   bool log_hard_failures,
+                                   bool need_precise_constants,
+                                   std::string* error_string)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -374,7 +383,7 @@
    *  (3) Iterate through the method, checking type safety and looking
    *      for code flow problems.
    */
-  static FailureKind VerifyMethod(Thread* self, uint32_t method_idx,
+  static FailureData VerifyMethod(Thread* self, uint32_t method_idx,
                                   const DexFile* dex_file,
                                   Handle<mirror::DexCache> dex_cache,
                                   Handle<mirror::ClassLoader> class_loader,
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 08f85b3..330c06a 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -178,9 +178,9 @@
   if (MonitorStackDepth() != 0) {
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
-      LOG(WARNING) << "expected empty monitor stack in "
-                   << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                   *verifier->GetMethodReference().dex_file);
+      VLOG(verifier) << "expected empty monitor stack in "
+                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                     *verifier->GetMethodReference().dex_file);
     }
   }
 }
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 37343b5..b7cde99 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -348,9 +348,9 @@
   } else if (monitors_.size() >= 32) {
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
-      LOG(WARNING) << "monitor-enter stack overflow while verifying "
-                   << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                   *verifier->GetMethodReference().dex_file);
+      VLOG(verifier) << "monitor-enter stack overflow while verifying "
+                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                     *verifier->GetMethodReference().dex_file);
     }
   } else {
     if (SetRegToLockDepth(reg_idx, monitors_.size())) {
@@ -364,9 +364,9 @@
     } else {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
-        LOG(WARNING) << "unexpected monitor-enter on register v" <<  reg_idx << " in "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+        VLOG(verifier) << "unexpected monitor-enter on register v" <<  reg_idx << " in "
+                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                       *verifier->GetMethodReference().dex_file);
       }
     }
   }
@@ -379,9 +379,9 @@
   } else if (monitors_.empty()) {
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
-      LOG(WARNING) << "monitor-exit stack underflow while verifying "
-                   << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                   *verifier->GetMethodReference().dex_file);
+      VLOG(verifier) << "monitor-exit stack underflow while verifying "
+                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                     *verifier->GetMethodReference().dex_file);
     }
   } else {
     monitors_.pop_back();
@@ -400,9 +400,9 @@
     if (!success) {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
-        LOG(WARNING) << "monitor-exit not unlocking the top of the monitor stack while verifying "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+        VLOG(verifier) << "monitor-exit not unlocking the top of the monitor stack while verifying "
+                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                       *verifier->GetMethodReference().dex_file);
       }
     } else {
       // Record the register was unlocked. This clears all aliases, thus it will also clear the
@@ -453,10 +453,10 @@
     if (monitors_.size() != incoming_line->monitors_.size()) {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
-        LOG(WARNING) << "mismatched stack depths (depth=" << MonitorStackDepth()
-                     << ", incoming depth=" << incoming_line->MonitorStackDepth() << ") in "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+        VLOG(verifier) << "mismatched stack depths (depth=" << MonitorStackDepth()
+                       << ", incoming depth=" << incoming_line->MonitorStackDepth() << ") in "
+                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                       *verifier->GetMethodReference().dex_file);
       }
     } else if (reg_to_lock_depths_ != incoming_line->reg_to_lock_depths_) {
       for (uint32_t idx = 0; idx < num_regs_; idx++) {
@@ -488,10 +488,10 @@
                                        reg_to_lock_depths_)) {
             verifier->Fail(VERIFY_ERROR_LOCKING);
             if (kDumpLockFailures) {
-              LOG(WARNING) << "mismatched stack depths for register v" << idx
-                           << ": " << depths  << " != " << incoming_depths << " in "
-                           << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                           *verifier->GetMethodReference().dex_file);
+              VLOG(verifier) << "mismatched stack depths for register v" << idx
+                             << ": " << depths  << " != " << incoming_depths << " in "
+                             << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                             *verifier->GetMethodReference().dex_file);
             }
             break;
           }
@@ -530,11 +530,11 @@
               // No aliases for both current and incoming, we'll lose information.
               verifier->Fail(VERIFY_ERROR_LOCKING);
               if (kDumpLockFailures) {
-                LOG(WARNING) << "mismatched lock levels for register v" << idx << ": "
-                    << std::hex << locked_levels << std::dec  << " != "
-                    << std::hex << incoming_locked_levels << std::dec << " in "
-                    << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                    *verifier->GetMethodReference().dex_file);
+                VLOG(verifier) << "mismatched lock levels for register v" << idx << ": "
+                               << std::hex << locked_levels << std::dec  << " != "
+                               << std::hex << incoming_locked_levels << std::dec << " in "
+                               << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                               *verifier->GetMethodReference().dex_file);
               }
               break;
             }
diff --git a/test/565-checker-doublenegbitwise/src/Main.java b/test/565-checker-doublenegbitwise/src/Main.java
index c51eda8..41af97b 100644
--- a/test/565-checker-doublenegbitwise/src/Main.java
+++ b/test/565-checker-doublenegbitwise/src/Main.java
@@ -56,6 +56,8 @@
   /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
   /// CHECK:                            Not
   /// CHECK-NOT:                        Not
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
   public static int $opt$noinline$andToOr(int a, int b) {
@@ -64,6 +66,43 @@
   }
 
   /**
+   * Test transformation of Not/Not/And into Or/Not for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<And:i\d+>>         And [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<And>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<Or:i\d+>>          Or [<<Cond2>>,<<Cond1>>]
+  /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<Or>>]
+  /// CHECK:                            Return [<<BooleanNot>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:                            BooleanNot
+  /// CHECK-NOT:                        BooleanNot
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:                        And
+
+  public static boolean $opt$noinline$booleanAndToOr(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a & !b;
+  }
+
+  /**
    * Test transformation of Not/Not/Or into And/Not.
    */
 
@@ -88,6 +127,8 @@
   /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
   /// CHECK:                            Not
   /// CHECK-NOT:                        Not
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                        Or
 
   public static long $opt$noinline$orToAnd(long a, long b) {
@@ -96,6 +137,43 @@
   }
 
   /**
+   * Test transformation of Not/Not/Or into Or/And for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<And:i\d+>>         And [<<Cond2>>,<<Cond1>>]
+  /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<And>>]
+  /// CHECK:                            Return [<<BooleanNot>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:                            BooleanNot
+  /// CHECK-NOT:                        BooleanNot
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:                        Or
+
+  public static boolean $opt$noinline$booleanOrToAnd(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a | !b;
+  }
+
+  /**
    * Test that the transformation copes with inputs being separated from the
    * bitwise operations.
    * This is a regression test. The initial logic was inserting the new bitwise
@@ -127,6 +205,8 @@
   /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
   /// CHECK:                            Not
   /// CHECK-NOT:                        Not
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                        Or
 
   public static int $opt$noinline$regressInputsAway(int a, int b) {
@@ -167,6 +247,38 @@
   }
 
   /**
+   * Test transformation of Not/Not/Xor into Xor for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Cond2>>,<<Cond1>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:                        BooleanNot
+
+  public static boolean $opt$noinline$booleanNotXorToXor(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a ^ !b;
+  }
+
+  /**
    * Check that no transformation is done when one Not has multiple uses.
    */
 
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
index edb31e6..3a1b3fc 100644
--- a/test/566-checker-codegen-select/src/Main.java
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK:             LessThanOrEqual
   /// CHECK-NEXT:        Select
 
+  // Check that we generate CMOV for long on x86_64.
+  /// CHECK-START-X86_64: long Main.$noinline$longSelect_Constant(long) disassembly (after)
+  /// CHECK:             LessThanOrEqual
+  /// CHECK-NEXT:        Select
+  /// CHECK:             cmpq
+  /// CHECK:             cmovle/ngq
+
   public long $noinline$longSelect_Constant(long param) {
     if (doThrow) { throw new Error(); }
     long val_true = longB;
@@ -52,12 +59,34 @@
     return (param > 3L) ? val_true : val_false;
   }
 
+  // Check that we generate CMOV for int on x86_64.
+  /// CHECK-START-X86_64: int Main.$noinline$intSelect_Constant(int) disassembly (after)
+  /// CHECK:             LessThan
+  /// CHECK-NEXT:        Select
+  /// CHECK:             cmp
+  /// CHECK:             cmovl/nge
+
+  public int $noinline$intSelect_Constant(int param) {
+    if (doThrow) { throw new Error(); }
+    int val_true = intB;
+    int val_false = intC;
+    return (param >= 3) ? val_true : val_false;
+  }
+
   public static void main(String[] args) {
     Main m = new Main();
     assertLongEquals(5L, m.$noinline$longSelect(4L));
     assertLongEquals(7L, m.$noinline$longSelect(2L));
     assertLongEquals(5L, m.$noinline$longSelect_Constant(4L));
     assertLongEquals(7L, m.$noinline$longSelect_Constant(2L));
+    assertIntEquals(5, m.$noinline$intSelect_Constant(4));
+    assertIntEquals(7, m.$noinline$intSelect_Constant(2));
+  }
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error(expected + " != " + actual);
+    }
   }
 
   public static void assertLongEquals(long expected, long actual) {
@@ -71,4 +100,6 @@
   public long longA = 3L;
   public long longB = 5L;
   public long longC = 7L;
+  public int intB = 5;
+  public int intC = 7;
 }
diff --git a/test/566-checker-signum/src/Main.java b/test/566-checker-signum/src/Main.java
index cc4a984..0ad0042 100644
--- a/test/566-checker-signum/src/Main.java
+++ b/test/566-checker-signum/src/Main.java
@@ -54,6 +54,13 @@
     expectEquals(1, sign64(12345L));
     expectEquals(1, sign64(Long.MAX_VALUE));
 
+    expectEquals(-1, sign64(0x800000007FFFFFFFL));
+    expectEquals(-1, sign64(0x80000000FFFFFFFFL));
+    expectEquals(1, sign64(0x000000007FFFFFFFL));
+    expectEquals(1, sign64(0x00000000FFFFFFFFL));
+    expectEquals(1, sign64(0x7FFFFFFF7FFFFFFFL));
+    expectEquals(1, sign64(0x7FFFFFFFFFFFFFFFL));
+
     for (long i = -11L; i <= 11L; i++) {
       int expected = 0;
       if (i < 0) expected = -1;
@@ -61,6 +68,14 @@
       expectEquals(expected, sign64(i));
     }
 
+    for (long i = Long.MIN_VALUE; i <= Long.MIN_VALUE + 11L; i++) {
+      expectEquals(-1, sign64(i));
+    }
+
+    for (long i = Long.MAX_VALUE; i >= Long.MAX_VALUE - 11L; i--) {
+      expectEquals(1, sign64(i));
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
index 52abb75..951d2c7 100644
--- a/test/567-checker-compare/src/Main.java
+++ b/test/567-checker-compare/src/Main.java
@@ -88,6 +88,10 @@
     expectEquals(1, compare64(Long.MAX_VALUE, 1L));
     expectEquals(1, compare64(Long.MAX_VALUE, Long.MAX_VALUE - 1L));
 
+    expectEquals(-1, compare64(0x111111117FFFFFFFL, 0x11111111FFFFFFFFL));
+    expectEquals(0, compare64(0x111111117FFFFFFFL, 0x111111117FFFFFFFL));
+    expectEquals(1, compare64(0x11111111FFFFFFFFL, 0x111111117FFFFFFFL));
+
     for (long i = -11L; i <= 11L; i++) {
       for (long j = -11L; j <= 11L; j++) {
         int expected = 0;
@@ -97,6 +101,14 @@
       }
     }
 
+    for (long i = Long.MIN_VALUE; i <= Long.MIN_VALUE + 11L; i++) {
+      expectEquals(-1, compare64(i, 0));
+    }
+
+    for (long i = Long.MAX_VALUE; i >= Long.MAX_VALUE - 11L; i--) {
+      expectEquals(1, compare64(i, 0));
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt
new file mode 100644
index 0000000..555c6a9
--- /dev/null
+++ b/test/570-checker-osr/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+100000000
+200000000
+300000000
+400000000
diff --git a/test/570-checker-osr/info.txt b/test/570-checker-osr/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/570-checker-osr/info.txt
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
new file mode 100644
index 0000000..fb84687
--- /dev/null
+++ b/test/570-checker-osr/osr.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "stack_map.h"
+
+namespace art {
+
+class OsrVisitor : public StackVisitor {
+ public:
+  explicit OsrVisitor(Thread* thread)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        in_osr_method_(false) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if ((m_name.compare("$noinline$returnInt") == 0) ||
+        (m_name.compare("$noinline$returnFloat") == 0) ||
+        (m_name.compare("$noinline$returnDouble") == 0) ||
+        (m_name.compare("$noinline$returnLong") == 0) ||
+        (m_name.compare("$noinline$deopt") == 0)) {
+      const OatQuickMethodHeader* header =
+          Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
+      if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
+        in_osr_method_ = true;
+      }
+      return false;
+    }
+    return true;
+  }
+
+  bool in_osr_method_;
+};
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInOsrCode(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    // Just return true for non-jit configurations to stop the infinite loop.
+    return JNI_TRUE;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  OsrVisitor visitor(soa.Self());
+  visitor.WalkStack();
+  return visitor.in_osr_method_;
+}
+
+}  // namespace art
diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali
new file mode 100644
index 0000000..869c7c3
--- /dev/null
+++ b/test/570-checker-osr/smali/Osr.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LOsr;
+
+.super Ljava/lang/Object;
+
+# Check that blocks only havig nops are not merged when they are loop headers.
+# This ensures we can do on-stack replacement for branches to those nop blocks.
+
+## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination_final (after)
+## CHECK-DAG:                     SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none
+## CHECK-DAG:                     SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>>
+.method public static simpleLoop(II)I
+   .registers 3
+   const/16 v0, 0
+   :nop_entry
+   nop
+   :loop_entry
+   add-int v0, v0, v0
+   if-eq v0, v1, :loop_entry
+   if-eq v0, v2, :nop_entry
+   return v0
+.end method
diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java
new file mode 100644
index 0000000..907d133
--- /dev/null
+++ b/test/570-checker-osr/src/DeoptimizationController.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is a copy of 802-deoptimization/src/DeoptimizationController.java
+// because run-test requires standalone individual test.
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+/**
+ * Controls deoptimization using dalvik.system.VMDebug class.
+ */
+public class DeoptimizationController {
+  private static final String TEMP_FILE_NAME_PREFIX = "test";
+  private static final String TEMP_FILE_NAME_SUFFIX = ".trace";
+
+  private static File createTempFile() throws Exception {
+    try {
+      return  File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+
+  public static void startDeoptimization() {
+    File tempFile = null;
+    try {
+      tempFile = createTempFile();
+      String tempFileName = tempFile.getPath();
+
+      VMDebug.startMethodTracing(tempFileName, 0, 0, false, 1000);
+      if (VMDebug.getMethodTracingMode() == 0) {
+        throw new IllegalStateException("Not tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    } finally {
+      if (tempFile != null) {
+        tempFile.delete();
+      }
+    }
+  }
+
+  public static void stopDeoptimization() {
+    try {
+      VMDebug.stopMethodTracing();
+      if (VMDebug.getMethodTracingMode() != 0) {
+        throw new IllegalStateException("Still tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    }
+  }
+
+  private static class VMDebug {
+    private static final Method startMethodTracingMethod;
+    private static final Method stopMethodTracingMethod;
+    private static final Method getMethodTracingModeMethod;
+
+    static {
+      try {
+        Class<?> c = Class.forName("dalvik.system.VMDebug");
+        startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+            Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+        stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+        getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void startMethodTracing(String filename, int bufferSize, int flags,
+        boolean samplingEnabled, int intervalUs) throws Exception {
+      startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+          intervalUs);
+    }
+    public static void stopMethodTracing() throws Exception {
+      stopMethodTracingMethod.invoke(null);
+    }
+    public static int getMethodTracingMode() throws Exception {
+      return (int) getMethodTracingModeMethod.invoke(null);
+    }
+  }
+}
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
new file mode 100644
index 0000000..7485163
--- /dev/null
+++ b/test/570-checker-osr/src/Main.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    if ($noinline$returnInt() != 53) {
+      throw new Error("Unexpected return value");
+    }
+    if ($noinline$returnFloat() != 42.2f) {
+      throw new Error("Unexpected return value");
+    }
+    if ($noinline$returnDouble() != Double.longBitsToDouble(0xF000000000001111L)) {
+      throw new Error("Unexpected return value ");
+    }
+    if ($noinline$returnLong() != 0xFFFF000000001111L) {
+      throw new Error("Unexpected return value");
+    }
+
+    try {
+      $noinline$deopt();
+    } catch (Exception e) {}
+  }
+
+  public static int $noinline$returnInt() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 100000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return 53;
+  }
+
+  public static float $noinline$returnFloat() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 200000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return 42.2f;
+  }
+
+  public static double $noinline$returnDouble() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 300000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return Double.longBitsToDouble(0xF000000000001111L);
+  }
+
+  public static long $noinline$returnLong() {
+    if (doThrow) throw new Error("");
+    int i = 1000000;
+    for (; i < 400000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return 0xFFFF000000001111L;
+  }
+
+  public static void $noinline$deopt() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 100000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    DeoptimizationController.startDeoptimization();
+  }
+
+  public static int[] array = new int[4];
+
+  public static native boolean ensureInOsrCode();
+
+  public static boolean doThrow = false;
+}
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index 2f8094d..ec60240 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -19,6 +19,11 @@
   /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
+  /// CHECK-START-X86_64: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
     return cond ? x : y;
   }
@@ -26,6 +31,11 @@
   /// CHECK-START: int Main.BoolCond_IntVarCst(boolean, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
+  /// CHECK-START-X86_64: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarCst(boolean cond, int x) {
     return cond ? x : 1;
   }
@@ -33,10 +43,51 @@
   /// CHECK-START: int Main.BoolCond_IntCstVar(boolean, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
+  /// CHECK-START-X86_64: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntCstVar(boolean cond, int y) {
     return cond ? 1 : y;
   }
 
+  /// CHECK-START: long Main.BoolCond_LongVarVar(boolean, long, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongVarCst(boolean, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long BoolCond_LongVarCst(boolean cond, long x) {
+    return cond ? x : 1L;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongCstVar(boolean, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long BoolCond_LongCstVar(boolean cond, long y) {
+    return cond ? 1L : y;
+  }
+
   /// CHECK-START: float Main.BoolCond_FloatVarVar(boolean, float, float) register (after)
   /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
 
@@ -62,6 +113,11 @@
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
+  /// CHECK-START-X86_64: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
     return a > b ? x : y;
   }
@@ -71,11 +127,78 @@
   /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},{{z\d+}}]
   /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
 
+  /// CHECK-START-X86_64: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
   }
 
+  /// CHECK-START: long Main.IntNonmatCond_LongVarVar(int, int, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  /// CHECK-START-X86_64: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+
+  public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: long Main.IntMatCond_LongVarVar(int, int, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
+
+  /// CHECK-START-X86_64: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
+    long result = (a > b ? x : y);
+    return result + (a > b ? 0L : 1L);
+  }
+
+  /// CHECK-START: long Main.LongNonmatCond_LongVarVar(long, long, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  /// CHECK-START-X86_64: long Main.LongNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+
+  public static long LongNonmatCond_LongVarVar(long a, long b, long x, long y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
+
+  /// CHECK-START-X86_64: long Main.LongMatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long LongMatCond_LongVarVar(long a, long b, long x, long y) {
+    long result = (a > b ? x : y);
+    return result + (a > b ? 0L : 1L);
+  }
+
   /// CHECK-START: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) register (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
@@ -150,6 +273,13 @@
     assertEqual(1, BoolCond_IntCstVar(true, 7));
     assertEqual(7, BoolCond_IntCstVar(false, 7));
 
+    assertEqual(5L, BoolCond_LongVarVar(true, 5L, 7L));
+    assertEqual(7L, BoolCond_LongVarVar(false, 5L, 7L));
+    assertEqual(5L, BoolCond_LongVarCst(true, 5L));
+    assertEqual(1L, BoolCond_LongVarCst(false, 5L));
+    assertEqual(1L, BoolCond_LongCstVar(true, 7L));
+    assertEqual(7L, BoolCond_LongCstVar(false, 7L));
+
     assertEqual(5, BoolCond_FloatVarVar(true, 5, 7));
     assertEqual(7, BoolCond_FloatVarVar(false, 5, 7));
     assertEqual(5, BoolCond_FloatVarCst(true, 5));
diff --git a/test/971-iface-super/util-src/generate_smali.py b/test/971-iface-super/util-src/generate_smali.py
index f01c904..3681411 100755
--- a/test/971-iface-super/util-src/generate_smali.py
+++ b/test/971-iface-super/util-src/generate_smali.py
@@ -39,7 +39,7 @@
 import string
 
 # The max depth the type tree can have.
-MAX_IFACE_DEPTH = 3
+MAX_IFACE_DEPTH = 2
 
 class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
   """
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index faaf1f0..e547c72 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -40,7 +40,8 @@
   466-get-live-vreg/get_live_vreg_jni.cc \
   497-inlining-and-class-loader/clear_dex_cache.cc \
   543-env-long-ref/env_long_ref.cc \
-  566-polymorphic-inlining/polymorphic_inline.cc
+  566-polymorphic-inlining/polymorphic_inline.cc \
+  570-checker-osr/osr.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index a8938fa..f5ee87e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -401,13 +401,14 @@
 
 # 137:
 # This test unrolls and expects managed frames, but tracing means we run the interpreter.
-# 802:
+# 802 and 570-checker-osr:
 # This test dynamically enables tracing to force a deoptimization. This makes the test meaningless
 # when already tracing, and writes an error message that we do not want to check for.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
   087-gc-after-link \
   137-cfi \
   141-class-unload \
+  570-checker-osr \
   802-deoptimization
 
 ifneq (,$(filter trace stream,$(TRACE_TYPES)))
@@ -433,7 +434,9 @@
 # Known broken tests for the JIT.
 # CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT
 # also uses Generic JNI instead of the JNI compiler.
+# 570 is disabled while investigating osr flakiness.
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
+  570-checker-osr \
   137-cfi
 
 ifneq (,$(filter jit,$(COMPILER_TYPES)))