JIT baseline: trigger optimized compilation on hotness threshold.

- Add a new hotness count in the ProfilingInfo to not conflict with
interpreter hotness which may use it for OSR.
- Add a baseline flag in the OatQuickMethodHeader to identify baseline
compiled methods.
- Add a -Xusetieredjit flag to experiment and test.

Bug: 119800099
Test: test.py with Xusetieredjit to true

Change-Id: I8512853f869f1312e3edc60bf64413dee9143c52
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bef7169..8406ef5 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -395,7 +395,8 @@
   GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_,
                                    core_spill_mask_,
                                    fpu_spill_mask_,
-                                   GetGraph()->GetNumberOfVRegs());
+                                   GetGraph()->GetNumberOfVRegs(),
+                                   GetGraph()->IsCompilingBaseline());
 
   size_t frame_start = GetAssembler()->CodeSize();
   GenerateFrameEntry();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 47c62f9..894c7a4 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1061,20 +1061,67 @@
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
 }
 
+void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
+  MacroAssembler* masm = GetVIXLAssembler();
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    UseScratchRegisterScope temps(masm);
+    Register counter = temps.AcquireX();
+    Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
+    if (!is_frame_entry) {
+      __ Ldr(method, MemOperand(sp, 0));
+    }
+    __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Add(counter, counter, 1);
+    // Subtract one if the counter would overflow.
+    __ Sub(counter, counter, Operand(counter, LSR, 16));
+    __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
+  }
+
+  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+    ScopedObjectAccess soa(Thread::Current());
+    ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+    uint32_t address = reinterpret_cast32<uint32_t>(info);
+    vixl::aarch64::Label done;
+    UseScratchRegisterScope temps(masm);
+    Register temp = temps.AcquireX();
+    Register counter = temps.AcquireW();
+    __ Mov(temp, address);
+    __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+    __ Add(counter, counter, 1);
+    __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+    __ Tst(counter, 0xffff);
+    __ B(ne, &done);
+    if (is_frame_entry) {
+      if (HasEmptyFrame()) {
+        // The entyrpoint expects the method at the bottom of the stack. We
+        // claim stack space necessary for alignment.
+        __ Claim(kStackAlignment);
+        __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
+      } else if (!RequiresCurrentMethod()) {
+        __ Str(kArtMethodRegister, MemOperand(sp, 0));
+      }
+    } else {
+      CHECK(RequiresCurrentMethod());
+    }
+    uint32_t entrypoint_offset =
+        GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
+    __ Ldr(lr, MemOperand(tr, entrypoint_offset));
+    // Note: we don't record the call here (and therefore don't generate a stack
+    // map), as the entrypoint should never be suspended.
+    __ Blr(lr);
+    if (HasEmptyFrame()) {
+      CHECK(is_frame_entry);
+      __ Ldr(lr, MemOperand(sp, 8));
+      __ Drop(kStackAlignment);
+    }
+    __ Bind(&done);
+  }
+}
+
 void CodeGeneratorARM64::GenerateFrameEntry() {
   MacroAssembler* masm = GetVIXLAssembler();
   __ Bind(&frame_entry_label_);
 
-  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
-    UseScratchRegisterScope temps(masm);
-    Register temp = temps.AcquireX();
-    __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-    __ Add(temp, temp, 1);
-      // Subtract one if the counter would overflow.
-    __ Sub(temp, temp, Operand(temp, LSR, 16));
-    __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-  }
-
   bool do_overflow_check =
       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
   if (do_overflow_check) {
@@ -1136,7 +1183,7 @@
       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
     }
   }
-
+  MaybeIncrementHotness(/* is_frame_entry= */ true);
   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
 }
 
@@ -3177,17 +3224,7 @@
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register temp1 = temps.AcquireX();
-      Register temp2 = temps.AcquireX();
-      __ Ldr(temp1, MemOperand(sp, 0));
-      __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
-      __ Add(temp2, temp2, 1);
-      // Subtract one if the counter would overflow.
-      __ Sub(temp2, temp2, Operand(temp2, LSR, 16));
-      __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
-    }
+    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 253e915..6b2c805 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -787,6 +787,7 @@
   }
 
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
+  void MaybeIncrementHotness(bool is_frame_entry);
 
  private:
   // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9100c6c..49a608e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2080,27 +2080,79 @@
   }
 }
 
+void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    vixl32::Register temp = temps.Acquire();
+    static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
+    if (!is_frame_entry) {
+      __ Push(vixl32::Register(kMethodRegister));
+      GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
+    }
+    // Load with zero extend to clear the high bits for integer overflow check.
+    __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Add(temp, temp, 1);
+    // Subtract one if the counter would overflow.
+    __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
+    __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+    if (!is_frame_entry) {
+      __ Pop(vixl32::Register(kMethodRegister));
+    }
+  }
+
+  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+    ScopedObjectAccess soa(Thread::Current());
+    ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+    uint32_t address = reinterpret_cast32<uint32_t>(info);
+    vixl::aarch32::Label done;
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    temps.Exclude(ip);
+    if (!is_frame_entry) {
+      __ Push(r4);  // Will be used as temporary. For frame entry, r4 is always available.
+    }
+    __ Mov(r4, address);
+    __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+    __ Add(ip, ip, 1);
+    __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+    if (!is_frame_entry) {
+      __ Pop(r4);
+    }
+    __ Lsls(ip, ip, 16);
+    __ B(ne, &done);
+    uint32_t entry_point_offset =
+        GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
+    if (HasEmptyFrame()) {
+      CHECK(is_frame_entry);
+      // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for
+      // alignment.
+      uint32_t core_spill_mask =
+          (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode());
+      __ Push(RegisterList(core_spill_mask));
+      __ Ldr(lr, MemOperand(tr, entry_point_offset));
+      __ Blx(lr);
+      __ Pop(RegisterList(core_spill_mask));
+    } else {
+      if (!RequiresCurrentMethod()) {
+        CHECK(is_frame_entry);
+        GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
+      }
+    __ Ldr(lr, MemOperand(tr, entry_point_offset));
+    __ Blx(lr);
+    }
+    __ Bind(&done);
+  }
+}
+
 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   __ Bind(&frame_entry_label_);
 
-  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
-    UseScratchRegisterScope temps(GetVIXLAssembler());
-    vixl32::Register temp = temps.Acquire();
-    static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
-    // Load with sign extend to set the high bits for integer overflow check.
-    __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-    __ Add(temp, temp, 1);
-    // Subtract one if the counter would overflow.
-    __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
-    __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-  }
-
   if (HasEmptyFrame()) {
     // Ensure that the CFI opcode list is not empty.
     GetAssembler()->cfi().Nop();
+    MaybeIncrementHotness(/* is_frame_entry= */ true);
     return;
   }
 
@@ -2201,6 +2253,7 @@
     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
   }
 
+  MaybeIncrementHotness(/* is_frame_entry= */ true);
   MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
 }
 
@@ -2498,19 +2551,7 @@
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      vixl32::Register temp = temps.Acquire();
-      __ Push(vixl32::Register(kMethodRegister));
-      GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
-      // Load with sign extend to set the high bits for integer overflow check.
-      __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-      __ Add(temp, temp, 1);
-      // Subtract one if the counter would overflow.
-      __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
-      __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-      __ Pop(vixl32::Register(kMethodRegister));
-    }
+    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 3d4c231..48fb082 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -757,6 +757,7 @@
   }
 
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass);
+  void MaybeIncrementHotness(bool is_frame_entry);
 
  private:
   // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 54da879..3077be0 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1072,6 +1072,72 @@
   return dwarf::Reg::X86Core(static_cast<int>(reg));
 }
 
+void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    Register reg = EAX;
+    if (is_frame_entry) {
+      reg = kMethodRegisterArgument;
+    } else {
+      __ pushl(EAX);
+      __ movl(EAX, Address(ESP, kX86WordSize));
+    }
+    NearLabel overflow;
+    __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(ArtMethod::MaxCounter()));
+    __ j(kEqual, &overflow);
+    __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(1));
+    __ Bind(&overflow);
+    if (!is_frame_entry) {
+      __ popl(EAX);
+    }
+  }
+
+  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+    ScopedObjectAccess soa(Thread::Current());
+    ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+    uint32_t address = reinterpret_cast32<uint32_t>(info);
+    NearLabel done;
+    if (HasEmptyFrame()) {
+      CHECK(is_frame_entry);
+      // Alignment
+      __ subl(ESP, Immediate(8));
+      __ cfi().AdjustCFAOffset(8);
+      // We need a temporary. The stub also expects the method at bottom of stack.
+      __ pushl(EAX);
+      __ cfi().AdjustCFAOffset(4);
+      __ movl(EAX, Immediate(address));
+      __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1));
+      __ j(kCarryClear, &done);
+      GenerateInvokeRuntime(
+          GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
+      __ Bind(&done);
+      // We don't strictly require to restore EAX, but this makes the generated
+      // code easier to reason about.
+      __ popl(EAX);
+      __ cfi().AdjustCFAOffset(-4);
+      __ addl(ESP, Immediate(8));
+      __ cfi().AdjustCFAOffset(-8);
+    } else {
+      if (!RequiresCurrentMethod()) {
+        CHECK(is_frame_entry);
+        __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+      }
+      // We need a temporary.
+      __ pushl(EAX);
+      __ cfi().AdjustCFAOffset(4);
+      __ movl(EAX, Immediate(address));
+      __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1));
+      __ popl(EAX);  // Put stack as expected before exiting or calling stub.
+      __ cfi().AdjustCFAOffset(-4);
+      __ j(kCarryClear, &done);
+      GenerateInvokeRuntime(
+          GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
+      __ Bind(&done);
+    }
+  }
+}
+
 void CodeGeneratorX86::GenerateFrameEntry() {
   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
   __ Bind(&frame_entry_label_);
@@ -1079,51 +1145,39 @@
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
 
-  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
-    NearLabel overflow;
-    __ cmpw(Address(kMethodRegisterArgument,
-                    ArtMethod::HotnessCountOffset().Int32Value()),
-            Immediate(ArtMethod::MaxCounter()));
-    __ j(kEqual, &overflow);
-    __ addw(Address(kMethodRegisterArgument,
-                    ArtMethod::HotnessCountOffset().Int32Value()),
-            Immediate(1));
-    __ Bind(&overflow);
-  }
-
   if (!skip_overflow_check) {
     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
     RecordPcInfo(nullptr, 0);
   }
 
-  if (HasEmptyFrame()) {
-    return;
-  }
+  if (!HasEmptyFrame()) {
+    for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+      Register reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        __ pushl(reg);
+        __ cfi().AdjustCFAOffset(kX86WordSize);
+        __ cfi().RelOffset(DWARFReg(reg), 0);
+      }
+    }
 
-  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
-    Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg)) {
-      __ pushl(reg);
-      __ cfi().AdjustCFAOffset(kX86WordSize);
-      __ cfi().RelOffset(DWARFReg(reg), 0);
+    int adjust = GetFrameSize() - FrameEntrySpillSize();
+    __ subl(ESP, Immediate(adjust));
+    __ cfi().AdjustCFAOffset(adjust);
+    // Save the current method if we need it. Note that we do not
+    // do this in HCurrentMethod, as the instruction might have been removed
+    // in the SSA graph.
+    if (RequiresCurrentMethod()) {
+      __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+    }
+
+    if (GetGraph()->HasShouldDeoptimizeFlag()) {
+      // Initialize should_deoptimize flag to 0.
+      __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
     }
   }
 
-  int adjust = GetFrameSize() - FrameEntrySpillSize();
-  __ subl(ESP, Immediate(adjust));
-  __ cfi().AdjustCFAOffset(adjust);
-  // Save the current method if we need it. Note that we do not
-  // do this in HCurrentMethod, as the instruction might have been removed
-  // in the SSA graph.
-  if (RequiresCurrentMethod()) {
-    __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
-  }
-
-  if (GetGraph()->HasShouldDeoptimizeFlag()) {
-    // Initialize should_deoptimize flag to 0.
-    __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
-  }
+  MaybeIncrementHotness(/* is_frame_entry= */ true);
 }
 
 void CodeGeneratorX86::GenerateFrameExit() {
@@ -1391,18 +1445,7 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
-      __ pushl(EAX);
-      __ movl(EAX, Address(ESP, kX86WordSize));
-          NearLabel overflow;
-      __ cmpw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()),
-              Immediate(ArtMethod::MaxCounter()));
-      __ j(kEqual, &overflow);
-      __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()),
-              Immediate(1));
-      __ Bind(&overflow);
-      __ popl(EAX);
-    }
+    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index e305b50..16446ce 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -625,6 +625,7 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
 
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass);
+  void MaybeIncrementHotness(bool is_frame_entry);
 
   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
   // The correct value will be inserted when processing Assembler fixups.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 48a3d90..dd3a4f4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1346,6 +1346,53 @@
   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
 }
 
+void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    NearLabel overflow;
+    Register method = kMethodRegisterArgument;
+    if (!is_frame_entry) {
+      CHECK(RequiresCurrentMethod());
+      method = TMP;
+      __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+    }
+    __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(ArtMethod::MaxCounter()));
+    __ j(kEqual, &overflow);
+    __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(1));
+    __ Bind(&overflow);
+  }
+
+  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+    ScopedObjectAccess soa(Thread::Current());
+    ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+    uint64_t address = reinterpret_cast64<uint64_t>(info);
+    NearLabel done;
+    __ movq(CpuRegister(TMP), Immediate(address));
+    __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
+            Immediate(1));
+    __ j(kCarryClear, &done);
+    if (HasEmptyFrame()) {
+      CHECK(is_frame_entry);
+      // Frame alignment, and the stub expects the method on the stack.
+      __ pushq(CpuRegister(RDI));
+      __ cfi().AdjustCFAOffset(kX86_64WordSize);
+      __ cfi().RelOffset(DWARFReg(RDI), 0);
+    } else if (!RequiresCurrentMethod()) {
+      CHECK(is_frame_entry);
+      __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
+    }
+    GenerateInvokeRuntime(
+        GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
+    if (HasEmptyFrame()) {
+      __ popq(CpuRegister(RDI));
+      __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+      __ cfi().Restore(DWARFReg(RDI));
+    }
+    __ Bind(&done);
+  }
+}
+
 void CodeGeneratorX86_64::GenerateFrameEntry() {
   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
   __ Bind(&frame_entry_label_);
@@ -1353,17 +1400,6 @@
       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
 
-  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
-    NearLabel overflow;
-    __ cmpw(Address(CpuRegister(kMethodRegisterArgument),
-                    ArtMethod::HotnessCountOffset().Int32Value()),
-            Immediate(ArtMethod::MaxCounter()));
-    __ j(kEqual, &overflow);
-    __ addw(Address(CpuRegister(kMethodRegisterArgument),
-                    ArtMethod::HotnessCountOffset().Int32Value()),
-            Immediate(1));
-    __ Bind(&overflow);
-  }
 
   if (!skip_overflow_check) {
     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
@@ -1371,45 +1407,47 @@
     RecordPcInfo(nullptr, 0);
   }
 
-  if (HasEmptyFrame()) {
-    return;
-  }
+  if (!HasEmptyFrame()) {
+    for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+      Register reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        __ pushq(CpuRegister(reg));
+        __ cfi().AdjustCFAOffset(kX86_64WordSize);
+        __ cfi().RelOffset(DWARFReg(reg), 0);
+      }
+    }
 
-  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
-    Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg)) {
-      __ pushq(CpuRegister(reg));
-      __ cfi().AdjustCFAOffset(kX86_64WordSize);
-      __ cfi().RelOffset(DWARFReg(reg), 0);
+    int adjust = GetFrameSize() - GetCoreSpillSize();
+    __ subq(CpuRegister(RSP), Immediate(adjust));
+    __ cfi().AdjustCFAOffset(adjust);
+    uint32_t xmm_spill_location = GetFpuSpillStart();
+    size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
+
+    for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+      if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
+        int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+        __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+        __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
+      }
+    }
+
+    // Save the current method if we need it. Note that we do not
+    // do this in HCurrentMethod, as the instruction might have been removed
+    // in the SSA graph.
+    if (RequiresCurrentMethod()) {
+      CHECK(!HasEmptyFrame());
+      __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
+              CpuRegister(kMethodRegisterArgument));
+    }
+
+    if (GetGraph()->HasShouldDeoptimizeFlag()) {
+      CHECK(!HasEmptyFrame());
+      // Initialize should_deoptimize flag to 0.
+      __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
     }
   }
 
-  int adjust = GetFrameSize() - GetCoreSpillSize();
-  __ subq(CpuRegister(RSP), Immediate(adjust));
-  __ cfi().AdjustCFAOffset(adjust);
-  uint32_t xmm_spill_location = GetFpuSpillStart();
-  size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
-
-  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
-    if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
-      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
-      __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
-      __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
-    }
-  }
-
-  // Save the current method if we need it. Note that we do not
-  // do this in HCurrentMethod, as the instruction might have been removed
-  // in the SSA graph.
-  if (RequiresCurrentMethod()) {
-    __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
-            CpuRegister(kMethodRegisterArgument));
-  }
-
-  if (GetGraph()->HasShouldDeoptimizeFlag()) {
-    // Initialize should_deoptimize flag to 0.
-    __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
-  }
+  MaybeIncrementHotness(/* is_frame_entry= */ true);
 }
 
 void CodeGeneratorX86_64::GenerateFrameExit() {
@@ -1556,16 +1594,7 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
-      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
-      NearLabel overflow;
-      __ cmpw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
-              Immediate(ArtMethod::MaxCounter()));
-      __ j(kEqual, &overflow);
-      __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
-              Immediate(1));
-      __ Bind(&overflow);
-    }
+    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 5537a4a..2e8d9b3 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -603,6 +603,8 @@
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
 
 
+  void MaybeIncrementHotness(bool is_frame_entry);
+
   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
   // We will fix this up in the linker later to have the right value.
   static constexpr int32_t kDummy32BitOffset = 256;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ff7ed34..ecaedc7 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1813,6 +1813,7 @@
       graph_->IsDebuggable(),
       /* osr= */ false,
       /* is_shared_jit_code= */ graph_->IsCompilingForSharedJitCode(),
+      /* baseline= */ graph_->IsCompilingBaseline(),
       /* start_instruction_id= */ caller_instruction_counter);
   callee_graph->SetArtMethod(resolved_method);
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 09ae6fa..0433384 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -322,6 +322,7 @@
          bool debuggable = false,
          bool osr = false,
          bool is_shared_jit_code = false,
+         bool baseline = false,
          int start_instruction_id = 0)
       : allocator_(allocator),
         arena_stack_(arena_stack),
@@ -358,6 +359,7 @@
         art_method_(nullptr),
         inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
         osr_(osr),
+        baseline_(baseline),
         cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)),
         is_shared_jit_code_(is_shared_jit_code) {
     blocks_.reserve(kDefaultNumberOfBlocks);
@@ -589,6 +591,8 @@
 
   bool IsCompilingOsr() const { return osr_; }
 
+  bool IsCompilingBaseline() const { return baseline_; }
+
   bool IsCompilingForSharedJitCode() const {
     return is_shared_jit_code_;
   }
@@ -786,6 +790,10 @@
   // compiled code entries which the interpreter can directly jump to.
   const bool osr_;
 
+  // Whether we are compiling baseline (not running optimizations). This affects
+  // the code being generated.
+  const bool baseline_;
+
   // List of methods that are assumed to have single implementation.
   ArenaSet<ArtMethod*> cha_single_implementation_list_;
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3f11170..f8eae2f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -851,7 +851,8 @@
       dead_reference_safe,
       compiler_options.GetDebuggable(),
       /* osr= */ osr,
-      /* is_shared_jit_code= */ is_shared_jit_code);
+      /* is_shared_jit_code= */ is_shared_jit_code,
+      /* baseline= */ baseline);
 
   if (method != nullptr) {
     graph->SetArtMethod(method);
@@ -1174,7 +1175,8 @@
       jni_compiled_method.GetFrameSize(),
       jni_compiled_method.GetCoreSpillMask(),
       jni_compiled_method.GetFpSpillMask(),
-      /* num_dex_registers= */ 0);
+      /* num_dex_registers= */ 0,
+      /* baseline= */ false);
   stack_map_stream->EndMethod();
   return stack_map_stream->Encode();
 }
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 3f6010d..dd6d1a2 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -42,7 +42,8 @@
 void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
                                  size_t core_spill_mask,
                                  size_t fp_spill_mask,
-                                 uint32_t num_dex_registers) {
+                                 uint32_t num_dex_registers,
+                                 bool baseline) {
   DCHECK(!in_method_) << "Mismatched Begin/End calls";
   in_method_ = true;
   DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -52,6 +53,7 @@
   core_spill_mask_ = core_spill_mask;
   fp_spill_mask_ = fp_spill_mask;
   num_dex_registers_ = num_dex_registers;
+  baseline_ = baseline;
 
   if (kVerifyStackMaps) {
     dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -299,6 +301,7 @@
   DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
 
   uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
+  flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
   uint32_t bit_table_flags = 0;
   ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
     if (bit_table->size() != 0) {  // Record which bit-tables are stored.
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index f45e3d7..67f716c 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -61,7 +61,8 @@
   void BeginMethod(size_t frame_size_in_bytes,
                    size_t core_spill_mask,
                    size_t fp_spill_mask,
-                   uint32_t num_dex_registers);
+                   uint32_t num_dex_registers,
+                   bool baseline = false);
   void EndMethod();
 
   void BeginStackMapEntry(uint32_t dex_pc,
@@ -119,6 +120,7 @@
   uint32_t core_spill_mask_ = 0;
   uint32_t fp_spill_mask_ = 0;
   uint32_t num_dex_registers_ = 0;
+  bool baseline_;
   BitTableBuilder<StackMap> stack_maps_;
   BitTableBuilder<RegisterMask> register_masks_;
   BitmapTableBuilder stack_masks_;
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 319e359..0a43bfc 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -471,7 +471,7 @@
   EXPECT_EQ(56U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(8U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bad37bc..8b1fc9e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2803,3 +2803,15 @@
 .Ldone:
     blx lr
 END art_quick_update_inline_cache
+
+// On entry, method is at the bottom of the stack.
+ENTRY art_quick_compile_optimized
+    SETUP_SAVE_EVERYTHING_FRAME r0
+    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
+    mov r1, rSELF                            @ pass Thread::Current
+    bl     artCompileOptimized               @ (ArtMethod*, Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    // We don't need to restore the marking register here, as
+    // artCompileOptimized doesn't allow thread suspension.
+    blx lr
+END art_quick_compile_optimized
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6e9b533..e0094e6 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2927,3 +2927,15 @@
 .Ldone:
     ret
 END art_quick_update_inline_cache
+
+// On entry, method is at the bottom of the stack.
+ENTRY art_quick_compile_optimized
+    SETUP_SAVE_EVERYTHING_FRAME
+    ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod
+    mov x1, xSELF                             // pass Thread::Current
+    bl     artCompileOptimized                // (ArtMethod*, Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    // We don't need to restore the marking register here, as
+    // artCompileOptimized doesn't allow thread suspension.
+    ret
+END art_quick_compile_optimized
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2bf82d0..794ee89 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2570,3 +2570,20 @@
 
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
+
+// On entry, the method is at the bottom of the stack.
+DEFINE_FUNCTION art_quick_compile_optimized
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
+    mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod
+    sub LITERAL(8), %esp   		       // Alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    pushl %eax
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artCompileOptimized)           // (ArtMethod*, Thread*)
+    addl LITERAL(16), %esp                     // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
+END_FUNCTION art_quick_compile_optimized
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 22d0ce4..3b30c37 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2511,3 +2511,13 @@
 .Ldone:
     ret
 END_FUNCTION art_quick_update_inline_cache
+
+// On entry, method is at the bottom of the stack.
+DEFINE_FUNCTION art_quick_compile_optimized
+    SETUP_SAVE_EVERYTHING_FRAME
+    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
+    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
+    call SYMBOL(artCompileOptimized)            // (ArtMethod*, Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
+    ret
+END_FUNCTION art_quick_compile_optimized
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index d41f9a0..048deb4 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -125,8 +125,9 @@
   // StringBuilder append
   qpoints->pStringBuilderAppend = art_quick_string_builder_append;
 
-  // InlineCache update
+  // Tiered JIT support
   qpoints->pUpdateInlineCache = art_quick_update_inline_cache;
+  qpoints->pCompileOptimized = art_quick_compile_optimized;
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index efab7c2..e031b21 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -172,6 +172,7 @@
   V(StringBuilderAppend, void*, uint32_t) \
 \
   V(UpdateInlineCache, void, void) \
+  V(CompileOptimized, void, ArtMethod*, Thread*) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
   V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index 0838059..64be926 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -15,6 +15,8 @@
  */
 
 #include "callee_save_frame.h"
+#include "jit/jit.h"
+#include "runtime.h"
 #include "thread-inl.h"
 
 namespace art {
@@ -25,4 +27,11 @@
   self->CheckSuspend();
 }
 
+extern "C" void artCompileOptimized(ArtMethod* method, Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ScopedAssertNoThreadSuspension sants("Enqueuing optimized compilation");
+  Runtime::Current()->GetJit()->EnqueueOptimizedCompilation(method, self);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 3f4e91e..740629a 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -88,6 +88,7 @@
 }
 
 extern "C" void* art_quick_string_builder_append(uint32_t format);
+extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*);
 
 }  // namespace art
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 210d851..36f5b39 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -338,7 +338,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pStringBuilderAppend, pUpdateInlineCache,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pReadBarrierJni,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pCompileOptimized,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCompileOptimized, pReadBarrierJni,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00,
                          sizeof(void*));
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 8c7d657..ffcee4b 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -89,6 +89,8 @@
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
   jit_options->use_jit_compilation_ = options.GetOrDefault(RuntimeArgumentMap::UseJitCompilation);
+  jit_options->use_tiered_jit_compilation_ =
+      options.GetOrDefault(RuntimeArgumentMap::UseTieredJitCompilation);
 
   jit_options->code_cache_initial_capacity_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
@@ -318,13 +320,14 @@
   // If we get a request to compile a proxy method, we pass the actual Java method
   // of that proxy method, as the compiler does not expect a proxy method.
   ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
-  if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr, prejit, region)) {
+  if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr, prejit, baseline, region)) {
     return false;
   }
 
   VLOG(jit) << "Compiling method "
             << ArtMethod::PrettyMethod(method_to_compile)
-            << " osr=" << std::boolalpha << osr;
+            << " osr=" << std::boolalpha << osr
+            << " baseline=" << std::boolalpha << baseline;
   bool success = jit_compiler_->CompileMethod(self, region, method_to_compile, baseline, osr);
   code_cache_->DoneCompiling(method_to_compile, self, osr);
   if (!success) {
@@ -1449,7 +1452,10 @@
     if (old_count < HotMethodThreshold() && new_count >= HotMethodThreshold()) {
       if (!code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
         DCHECK(thread_pool_ != nullptr);
-        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile));
+        JitCompileTask::TaskKind kind = options_->UseTieredJitCompilation()
+            ? JitCompileTask::TaskKind::kCompileBaseline
+            : JitCompileTask::TaskKind::kCompile;
+        thread_pool_->AddTask(self, new JitCompileTask(method, kind));
       }
     }
     if (old_count < OSRMethodThreshold() && new_count >= OSRMethodThreshold()) {
@@ -1467,6 +1473,11 @@
   return true;
 }
 
+void Jit::EnqueueOptimizedCompilation(ArtMethod* method, Thread* self) {
+  thread_pool_->AddTask(
+      self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile));
+}
+
 class ScopedSetRuntimeThread {
  public:
   explicit ScopedSetRuntimeThread(Thread* self)
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e5b77c2..42adf6b 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -114,6 +114,10 @@
     return use_jit_compilation_;
   }
 
+  bool UseTieredJitCompilation() const {
+    return use_tiered_jit_compilation_;
+  }
+
   void SetUseJitCompilation(bool b) {
     use_jit_compilation_ = b;
   }
@@ -137,6 +141,7 @@
   static uint32_t RoundUpThreshold(uint32_t threshold);
 
   bool use_jit_compilation_;
+  bool use_tiered_jit_compilation_;
   size_t code_cache_initial_capacity_;
   size_t code_cache_max_capacity_;
   uint32_t compile_threshold_;
@@ -383,6 +388,8 @@
   // class path methods.
   void NotifyZygoteCompilationDone();
 
+  void EnqueueOptimizedCompilation(ArtMethod* method, Thread* self);
+
  private:
   Jit(JitCodeCache* code_cache, JitOptions* options);
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index c02a699..519655d 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1581,9 +1581,19 @@
                                        Thread* self,
                                        bool osr,
                                        bool prejit,
+                                       bool baseline,
                                        JitMemoryRegion* region) {
-  if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    return false;
+  const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode();
+  if (!osr && ContainsPc(existing_entry_point)) {
+    OatQuickMethodHeader* method_header =
+        OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+    if (CodeInfo::IsBaseline(method_header->GetOptimizedCodeInfoPtr()) == baseline) {
+      VLOG(jit) << "Not compiling "
+                << method->PrettyMethod()
+                << " because it has already been compiled"
+                << " baseline=" << std::boolalpha << baseline;
+      return false;
+    }
   }
 
   if (NeedsClinitCheckBeforeCall(method) && !prejit) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 58cf0e3..61fee34 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -184,6 +184,7 @@
                            Thread* self,
                            bool osr,
                            bool prejit,
+                           bool baseline,
                            JitMemoryRegion* region)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::jit_lock_);
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2cb569c..8c88760 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -26,7 +26,8 @@
 namespace art {
 
 ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
-      : method_(method),
+      : baseline_hotness_count_(0),
+        method_(method),
         saved_entry_point_(nullptr),
         number_of_inline_caches_(entries.size()),
         current_inline_uses_(0),
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index d4dc498..ada1036 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -126,9 +126,18 @@
         (current_inline_uses_ > 0);
   }
 
+  static constexpr MemberOffset BaselineHotnessCountOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(ProfilingInfo, baseline_hotness_count_));
+  }
+
  private:
   ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries);
 
+  // Hotness count for methods compiled with the JIT baseline compiler. Once
+  // a threshold is hit (currentily the maximum value of uint16_t), we will
+  // JIT compile optimized the method.
+  uint16_t baseline_hotness_count_;
+
   // Method this profiling info is for.
   // Not 'const' as JVMTI introduces obsolete methods that we implement by creating new ArtMethods.
   // See JitCodeCache::MoveObsoleteMethod.
diff --git a/runtime/oat.h b/runtime/oat.h
index 3b20ea1..6c739b2 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: pUpdateInlineCache entrypoint.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '7', '\0' } };
+  // Last oat version changed reason: pCompileOptimized entrypoint.
+  static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '8', '\0' } };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index bfedfa9..7ef1e6d 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -201,6 +201,10 @@
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
           .IntoKey(M::UseJitCompilation)
+      .Define("-Xusetieredjit:_")
+          .WithType<bool>()
+          .WithValueMap({{"false", false}, {"true", true}})
+          .IntoKey(M::UseTieredJitCompilation)
       .Define("-Xjitinitialsize:_")
           .WithType<MemoryKiB>()
           .IntoKey(M::JITCodeCacheInitialCapacity)
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 037167e..5db5a90 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -75,6 +75,7 @@
 RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        (kUseTlab || kUseReadBarrier))
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
 RUNTIME_OPTIONS_KEY (bool,                UseJitCompilation,              true)
+RUNTIME_OPTIONS_KEY (bool,                UseTieredJitCompilation,        false)
 RUNTIME_OPTIONS_KEY (bool,                DumpNativeStackOnSigQuit,       true)
 RUNTIME_OPTIONS_KEY (bool,                MadviseRandomAccess,            false)
 RUNTIME_OPTIONS_KEY (JniIdType,           OpaqueJniIds,                   JniIdType::kDefault)  // -Xopaque-jni-ids:{true, false}
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 598f3e4..2065a79 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -429,6 +429,10 @@
     return (*code_info_data & kHasInlineInfo) != 0;
   }
 
+  ALWAYS_INLINE static bool IsBaseline(const uint8_t* code_info_data) {
+    return (*code_info_data & kIsBaseline) != 0;
+  }
+
  private:
   // Scan backward to determine dex register locations at given stack map.
   void DecodeDexRegisterMap(uint32_t stack_map_index,
@@ -472,6 +476,7 @@
 
   enum Flags {
     kHasInlineInfo = 1 << 0,
+    kIsBaseline = 1 << 1,
   };
 
   // The CodeInfo starts with sequence of variable-length bit-encoded integers.