Merge changes I6407c9b4,I49d9da67

* changes:
  Revert "Allow deoptimization when returning from a runtime method."
  Revert "Fix some issues for deoptimizing runtime methods."
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 3bec30f..6c974c3 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -61,10 +61,6 @@
       // lui reg, offset_high
       DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
       DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
-      // addu reg, reg, reg2
-      DCHECK_EQ((*code)[literal_offset + 4], 0x21);
-      DCHECK_EQ(((*code)[literal_offset + 5] & 0x07), 0x00);
-      DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x00);
     }
   } else {
     // instr reg(s), offset_low
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 2e78af5..51f5b96 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -267,13 +267,10 @@
       DCHECK(bss_info_high_);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, bss_info_high_);
-      bool reordering = __ SetReorder(false);
-      __ Bind(&info_low->label);
-      __ StoreToOffset(kStoreWord,
-                       calling_convention.GetRegisterAt(0),
-                       entry_address,
-                       /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      __ Sw(calling_convention.GetRegisterAt(0),
+            entry_address,
+            /* placeholder */ 0x5678,
+            &info_low->label);
     }
 
     // Move the class to the desired location.
@@ -296,10 +293,8 @@
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, info_high);
-      bool reordering = __ SetReorder(false);
-      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low);
-      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base);
+      __ Sw(out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678, &info_low->label);
     }
     __ B(GetExitLabel());
   }
@@ -366,13 +361,10 @@
       DCHECK(bss_info_high_);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, bss_info_high_);
-      bool reordering = __ SetReorder(false);
-      __ Bind(&info_low->label);
-      __ StoreToOffset(kStoreWord,
-                       calling_convention.GetRegisterAt(0),
-                       entry_address,
-                       /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      __ Sw(calling_convention.GetRegisterAt(0),
+            entry_address,
+            /* placeholder */ 0x5678,
+            &info_low->label);
     }
 
     Primitive::Type type = instruction_->GetType();
@@ -391,10 +383,8 @@
           mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high);
-      bool reordering = __ SetReorder(false);
-      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low);
-      __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base);
+      __ Sw(out, TMP, /* placeholder */ 0x5678, &info_low->label);
     }
     __ B(GetExitLabel());
   }
@@ -1743,16 +1733,17 @@
 
 void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
                                                              Register out,
-                                                             Register base,
-                                                             PcRelativePatchInfo* info_low) {
+                                                             Register base) {
   DCHECK(!info_high->patch_info_high);
   DCHECK_NE(out, base);
+  bool reordering = __ SetReorder(false);
   if (GetInstructionSetFeatures().IsR6()) {
     DCHECK_EQ(base, ZERO);
     __ Bind(&info_high->label);
     __ Bind(&info_high->pc_rel_label);
     // Add the high half of a 32-bit offset to PC.
     __ Auipc(out, /* placeholder */ 0x1234);
+    __ SetReorder(reordering);
   } else {
     // If base is ZERO, emit NAL to obtain the actual base.
     if (base == ZERO) {
@@ -1766,15 +1757,12 @@
     if (base == ZERO) {
       __ Bind(&info_high->pc_rel_label);
     }
+    __ SetReorder(reordering);
     // Add the high half of a 32-bit offset to PC.
     __ Addu(out, out, (base == ZERO) ? RA : base);
   }
   // A following instruction will add the sign-extended low half of the 32-bit
   // offset to `out` (e.g. lw, jialc, addiu).
-  if (info_low != nullptr) {
-    DCHECK_EQ(info_low->patch_info_high, info_high);
-    __ Bind(&info_low->label);
-  }
 }
 
 CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch(
@@ -7515,11 +7503,9 @@
       PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
       PcRelativePatchInfo* info_low =
           NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high);
-      bool reordering = __ SetReorder(false);
       Register temp_reg = temp.AsRegister<Register>();
-      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low);
-      __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+      __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
@@ -7531,10 +7517,8 @@
       PcRelativePatchInfo* info_low = NewMethodBssEntryPatch(
           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high);
       Register temp_reg = temp.AsRegister<Register>();
-      bool reordering = __ SetReorder(false);
-      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low);
-      __ Lw(temp_reg, TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+      __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
@@ -7729,13 +7713,10 @@
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      out,
-                                                     base_or_current_method_reg,
-                                                     info_low);
-      __ Addiu(out, out, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+                                                     base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
@@ -7754,11 +7735,9 @@
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high);
       constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
       Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>();
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high,
                                                      temp,
                                                      base_or_current_method_reg);
-      __ SetReorder(reordering);
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               temp,
@@ -7899,13 +7878,10 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      out,
-                                                     base_or_current_method_reg,
-                                                     info_low);
-      __ Addiu(out, out, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+                                                     base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
@@ -7925,11 +7901,9 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
       constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
       Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>();
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      temp,
                                                      base_or_current_method_reg);
-      __ SetReorder(reordering);
       GenerateGcRootFieldLoad(load,
                               out_loc,
                               temp,
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 7195b9d..c0e1ec0 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -637,8 +637,7 @@
 
   void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
                                             Register out,
-                                            Register base,
-                                            PcRelativePatchInfo* info_low = nullptr);
+                                            Register base);
 
   // The JitPatchInfo is used for JIT string and class loads.
   struct JitPatchInfo {
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 85710d0..d835c63 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -31,9 +31,11 @@
 }  // namespace arm64
 namespace mips {
   class MipsAssembler;
+  class MipsLabel;
 }  // namespace mips
 namespace mips64 {
   class Mips64Assembler;
+  class Mips64Label;
 }  // namespace mips64
 namespace x86 {
   class X86Assembler;
@@ -114,7 +116,9 @@
 
   friend class arm64::Arm64Assembler;
   friend class mips::MipsAssembler;
+  friend class mips::MipsLabel;
   friend class mips64::Mips64Assembler;
+  friend class mips64::Mips64Label;
   friend class x86::X86Assembler;
   friend class x86::NearLabel;
   friend class x86_64::X86_64Assembler;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 18099d8..b300cc5 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -47,7 +47,8 @@
       fpr_outs_mask_(0),
       fpr_ins_mask_(0),
       cc_outs_mask_(0),
-      cc_ins_mask_(0) {}
+      cc_ins_mask_(0),
+      patcher_label_(nullptr) {}
 
 void MipsAssembler::DsFsmInstr(uint32_t instruction,
                                uint32_t gpr_outs_mask,
@@ -55,7 +56,8 @@
                                uint32_t fpr_outs_mask,
                                uint32_t fpr_ins_mask,
                                uint32_t cc_outs_mask,
-                               uint32_t cc_ins_mask) {
+                               uint32_t cc_ins_mask,
+                               MipsLabel* patcher_label) {
   if (!reordering_) {
     CHECK_EQ(ds_fsm_state_, kExpectingLabel);
     CHECK_EQ(delay_slot_.instruction_, 0u);
@@ -96,6 +98,7 @@
   delay_slot_.fpr_ins_mask_ = fpr_ins_mask;
   delay_slot_.cc_outs_mask_ = cc_outs_mask;
   delay_slot_.cc_ins_mask_ = cc_ins_mask;
+  delay_slot_.patcher_label_ = patcher_label;
 }
 
 void MipsAssembler::DsFsmLabel() {
@@ -167,8 +170,12 @@
   DsFsmInstr(0, 0, 0, 0, 0, 0, 0);
 }
 
-void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) {
-  DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0);
+void MipsAssembler::DsFsmInstrRrr(uint32_t instruction,
+                                  Register out,
+                                  Register in1,
+                                  Register in2,
+                                  MipsLabel* patcher_label) {
+  DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0, patcher_label);
 }
 
 void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction,
@@ -310,8 +317,8 @@
   // Switch from appending instructions at the end of the buffer to overwriting
   // existing instructions (branch placeholders) in the buffer.
   overwriting_ = true;
-  for (auto& branch : branches_) {
-    EmitBranch(&branch);
+  for (size_t id = 0; id < branches_.size(); id++) {
+    EmitBranch(id);
   }
   overwriting_ = false;
 }
@@ -531,8 +538,15 @@
   DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
 }
 
+void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+  if (patcher_label != nullptr) {
+    Bind(patcher_label);
+  }
+  DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs, patcher_label);
+}
+
 void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs);
+  Addiu(rt, rs, imm16, /* patcher_label */ nullptr);
 }
 
 void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
@@ -791,8 +805,15 @@
   DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs);
 }
 
+void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+  if (patcher_label != nullptr) {
+    Bind(patcher_label);
+  }
+  DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs, patcher_label);
+}
+
 void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs);
+  Lw(rt, rs, imm16, /* patcher_label */ nullptr);
 }
 
 void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
@@ -866,8 +887,15 @@
   DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs);
 }
 
+void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+  if (patcher_label != nullptr) {
+    Bind(patcher_label);
+  }
+  DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs, patcher_label);
+}
+
 void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs);
+  Sw(rt, rs, imm16, /* patcher_label */ nullptr);
 }
 
 void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
@@ -991,6 +1019,7 @@
 
 void MipsAssembler::Jalr(Register rd, Register rs) {
   uint32_t last_instruction = delay_slot_.instruction_;
+  MipsLabel* patcher_label = delay_slot_.patcher_label_;
   bool exchange = (last_instruction != 0 &&
       (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 &&
       ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0);
@@ -1011,6 +1040,10 @@
     CHECK_EQ(instr1, last_instruction);
     buffer_.Store<uint32_t>(pos1, instr2);
     buffer_.Store<uint32_t>(pos2, instr1);
+    // Move the patcher label along with the patched instruction.
+    if (patcher_label != nullptr) {
+      patcher_label->AdjustBoundPosition(sizeof(uint32_t));
+    }
   } else if (reordering_) {
     Nop();
   }
@@ -3237,7 +3270,8 @@
       lhs_reg_(0),
       rhs_reg_(0),
       condition_(kUncond),
-      delayed_instruction_(kUnfilledDelaySlot) {
+      delayed_instruction_(kUnfilledDelaySlot),
+      patcher_label_(nullptr) {
   InitializeType(
       (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)),
       is_r6);
@@ -3256,7 +3290,8 @@
       lhs_reg_(lhs_reg),
       rhs_reg_(rhs_reg),
       condition_(condition),
-      delayed_instruction_(kUnfilledDelaySlot) {
+      delayed_instruction_(kUnfilledDelaySlot),
+      patcher_label_(nullptr) {
   CHECK_NE(condition, kUncond);
   switch (condition) {
     case kCondLT:
@@ -3313,7 +3348,8 @@
       lhs_reg_(dest_reg),
       rhs_reg_(base_reg),
       condition_(kUncond),
-      delayed_instruction_(kUnfilledDelaySlot) {
+      delayed_instruction_(kUnfilledDelaySlot),
+      patcher_label_(nullptr) {
   CHECK_NE(dest_reg, ZERO);
   if (is_r6) {
     CHECK_EQ(base_reg, ZERO);
@@ -3690,6 +3726,17 @@
   return &branches_[branch_id];
 }
 
+void MipsAssembler::BindRelativeToPrecedingBranch(MipsLabel* label,
+                                                  uint32_t prev_branch_id_plus_one,
+                                                  uint32_t position) {
+  if (prev_branch_id_plus_one != 0) {
+    const Branch* branch = GetBranch(prev_branch_id_plus_one - 1);
+    position -= branch->GetEndLocation();
+  }
+  label->prev_branch_id_plus_one_ = prev_branch_id_plus_one;
+  label->BindTo(position);
+}
+
 void MipsAssembler::Bind(MipsLabel* label) {
   CHECK(!label->IsBound());
   uint32_t bound_pc = buffer_.Size();
@@ -3715,22 +3762,15 @@
 
   // Now make the label object contain its own location (relative to the end of the preceding
   // branch, if any; it will be used by the branches referring to and following this label).
-  label->prev_branch_id_plus_one_ = branches_.size();
-  if (label->prev_branch_id_plus_one_) {
-    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
-    const Branch* branch = GetBranch(branch_id);
-    bound_pc -= branch->GetEndLocation();
-  }
-  label->BindTo(bound_pc);
+  BindRelativeToPrecedingBranch(label, branches_.size(), bound_pc);
 }
 
 uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
   CHECK(label->IsBound());
   uint32_t target = label->Position();
-  if (label->prev_branch_id_plus_one_) {
+  if (label->prev_branch_id_plus_one_ != 0) {
     // Get label location based on the branch preceding it.
-    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
-    const Branch* branch = GetBranch(branch_id);
+    const Branch* branch = GetBranch(label->prev_branch_id_plus_one_ - 1);
     target += branch->GetEndLocation();
   }
   return target;
@@ -3872,10 +3912,15 @@
   return delayed_instruction_;
 }
 
-void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) {
+MipsLabel* MipsAssembler::Branch::GetPatcherLabel() const {
+  return patcher_label_;
+}
+
+void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label) {
   CHECK_NE(instruction, kUnfilledDelaySlot);
   CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot);
   delayed_instruction_ = instruction;
+  patcher_label_ = patcher_label;
 }
 
 void MipsAssembler::Branch::DecrementLocations() {
@@ -3916,7 +3961,7 @@
     buffer_.Resize(size);
     // Attach it to the branch and adjust the branch locations.
     branch.DecrementLocations();
-    branch.SetDelayedInstruction(delay_slot_.instruction_);
+    branch.SetDelayedInstruction(delay_slot_.instruction_, delay_slot_.patcher_label_);
   } else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) {
     // If reordefing is disabled, prevent absorption of the target instruction.
     branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot);
@@ -4140,15 +4185,49 @@
   {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6FarLiteral
 };
 
+static inline bool IsAbsorbableInstruction(uint32_t instruction) {
+  // The relative patcher patches addiu, lw and sw with an immediate operand of 0x5678.
+  // We want to make sure that these instructions do not get absorbed into delay slots
+  // of unconditional branches on R2. Absorption would otherwise make copies of
+  // unpatched instructions.
+  if ((instruction & 0xFFFF) != 0x5678) {
+    return true;
+  }
+  switch (instruction >> kOpcodeShift) {
+    case 0x09:  // Addiu.
+    case 0x23:  // Lw.
+    case 0x2B:  // Sw.
+      return false;
+    default:
+      return true;
+  }
+}
+
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
-void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
+void MipsAssembler::EmitBranch(uint32_t branch_id) {
   CHECK_EQ(overwriting_, true);
+  Branch* branch = GetBranch(branch_id);
   overwrite_location_ = branch->GetLocation();
   uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
   BranchCondition condition = branch->GetCondition();
   Register lhs = branch->GetLeftRegister();
   Register rhs = branch->GetRightRegister();
   uint32_t delayed_instruction = branch->GetDelayedInstruction();
+  MipsLabel* patcher_label = branch->GetPatcherLabel();
+  if (patcher_label != nullptr) {
+    // Update the patcher label location to account for branch promotion and
+    // delay slot filling.
+    CHECK(patcher_label->IsBound());
+    uint32_t bound_pc = branch->GetLocation();
+    if (!branch->IsLong()) {
+      // Short branches precede delay slots.
+      // Long branches follow "delay slots".
+      bound_pc += sizeof(uint32_t);
+    }
+    // Rebind the label.
+    patcher_label->Reinitialize();
+    BindRelativeToPrecedingBranch(patcher_label, branch_id, bound_pc);
+  }
   switch (branch->GetType()) {
     // R2 short branches.
     case Branch::kUncondBranch:
@@ -4164,8 +4243,11 @@
         if (offset != 0x7FFF) {
           uint32_t target = branch->GetTarget();
           if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) {
-            delayed_instruction = buffer_.Load<uint32_t>(target);
-            offset++;
+            uint32_t target_instruction = buffer_.Load<uint32_t>(target);
+            if (IsAbsorbableInstruction(target_instruction)) {
+              delayed_instruction = target_instruction;
+              offset++;
+            }
           }
         }
       }
@@ -4406,6 +4488,11 @@
   }
   CHECK_EQ(overwrite_location_, branch->GetEndLocation());
   CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
+  if (patcher_label != nullptr) {
+    // The patched instruction should look like one.
+    uint32_t patched_instruction = buffer_.Load<uint32_t>(GetLabelLocation(patcher_label));
+    CHECK(!IsAbsorbableInstruction(patched_instruction));
+  }
 }
 
 void MipsAssembler::B(MipsLabel* label, bool is_bare) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 7f9d576..0f163ac 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -80,6 +80,12 @@
   MipsLabel(MipsLabel&& src)
       : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
 
+  void AdjustBoundPosition(int delta) {
+    CHECK(IsBound());
+    // Bound label's position is negative, hence decrementing it.
+    position_ -= delta;
+  }
+
  private:
   uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
 
@@ -215,6 +221,7 @@
 
   // Emit Machine Instructions.
   void Addu(Register rd, Register rs, Register rt);
+  void Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
   void Addiu(Register rt, Register rs, uint16_t imm16);
   void Subu(Register rd, Register rs, Register rt);
 
@@ -272,6 +279,7 @@
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
+  void Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
   void Lw(Register rt, Register rs, uint16_t imm16);
   void Lwl(Register rt, Register rs, uint16_t imm16);
   void Lwr(Register rt, Register rs, uint16_t imm16);
@@ -287,6 +295,7 @@
 
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
+  void Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
   void Sw(Register rt, Register rs, uint16_t imm16);
   void Swl(Register rt, Register rs, uint16_t imm16);
   void Swr(Register rt, Register rs, uint16_t imm16);
@@ -1288,6 +1297,9 @@
     uint32_t cc_ins_mask_;
     // Branches never operate on the LO and HI registers, hence there's
     // no mask for LO and HI.
+
+    // Label for patchable instructions to allow moving them into delay slots.
+    MipsLabel* patcher_label_;
   };
 
   // Delay slot finite state machine's (DS FSM's) state. The FSM state is updated
@@ -1440,8 +1452,9 @@
 
     // Various helpers for branch delay slot management.
     bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const;
-    void SetDelayedInstruction(uint32_t instruction);
+    void SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label = nullptr);
     uint32_t GetDelayedInstruction() const;
+    MipsLabel* GetPatcherLabel() const;
     void DecrementLocations();
 
     // Returns the bit size of the signed offset that the branch instruction can handle.
@@ -1526,6 +1539,8 @@
                                     // kUnfillableDelaySlot if none and unfillable
                                     // (the latter is only used for unconditional R2
                                     // branches).
+
+    MipsLabel* patcher_label_;      // Patcher label for the instruction in the delay slot.
   };
   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
@@ -1580,9 +1595,14 @@
                   uint32_t fpr_outs_mask,
                   uint32_t fpr_ins_mask,
                   uint32_t cc_outs_mask,
-                  uint32_t cc_ins_mask);
+                  uint32_t cc_ins_mask,
+                  MipsLabel* patcher_label = nullptr);
   void DsFsmInstrNop(uint32_t instruction);
-  void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2);
+  void DsFsmInstrRrr(uint32_t instruction,
+                     Register out,
+                     Register in1,
+                     Register in2,
+                     MipsLabel* patcher_label = nullptr);
   void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3);
   void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2);
   void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3);
@@ -1605,12 +1625,15 @@
   const Branch* GetBranch(uint32_t branch_id) const;
   uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
   uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
+  void BindRelativeToPrecedingBranch(MipsLabel* label,
+                                     uint32_t prev_branch_id_plus_one,
+                                     uint32_t position);
 
   void EmitLiterals();
   void ReserveJumpTableSpace();
   void EmitJumpTables();
   void PromoteBranches();
-  void EmitBranch(Branch* branch);
+  void EmitBranch(uint32_t branch_id);
   void EmitBranches();
   void PatchCFI(size_t number_of_delayed_adjust_pcs);
 
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 6e52b17..a5cd5a7 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -1529,10 +1529,62 @@
   DriverStr(expected, "SetReorder");
 }
 
-TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
-  mips::MipsLabel label;
+TEST_F(AssemblerMIPS32r6Test, ReorderPatchedInstruction) {
   __ SetReorder(true);
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  mips::MipsLabel label1, label2;
+  mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5;
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1);
+  __ Bc1eqz(mips::F0, &label1);
+  constexpr uint32_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label1);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2);
+  __ Bc1nez(mips::F2, &label2);
+  constexpr uint32_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label2);
+  __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3);
+  __ Bc1eqz(mips::F4, &label1);
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4);
+  __ Jalr(mips::T9);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5);
+  __ Bltc(mips::V0, mips::V1, &label2);
+  __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1eqz $f0, 1f\n"
+      "lw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bc1nez $f2, 2f\n"
+      "sw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "bc1eqz $f4, 1b\n"
+      "addiu $v0, $a0, 0x5678\n"
+      "jalr $t9\n"
+      "lw $v0, 0x5678($a0)\n"
+      "sw $v0, 0x5678($a0)\n"
+      "bltc $v0, $v1, 2b\n"
+      "nop\n"
+      "addu $zero, $zero, $zero\n";
+  DriverStr(expected, "ReorderPatchedInstruction");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 8) * 4u);
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
+  mips::MipsLabel label, patcher_label1, patcher_label2;
+  __ SetReorder(true);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1);
   __ Bc1nez(mips::F0, &label);
   constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
   for (uint32_t i = 0; i != kAdduCount1; ++i) {
@@ -1543,7 +1595,7 @@
   for (uint32_t i = 0; i != kAdduCount2; ++i) {
     __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
   }
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2);
   __ Bc1eqz(mips::F0, &label);
 
   uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
@@ -1557,7 +1609,7 @@
   std::ostringstream oss;
   oss <<
       ".set noreorder\n"
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "bc1eqz $f0, 1f\n"
       "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
       "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
@@ -1565,13 +1617,15 @@
       RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
       "2:\n" <<
       RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "bc1nez $f0, 3f\n"
       "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
       "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
       "3:\n";
   std::string expected = oss.str();
   DriverStr(expected, "LongBranchReorder");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 4) * 4u);
 }
 
 ///////////////////////
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index d9bf0b8..680c347 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2506,6 +2506,7 @@
 
 TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
   mips::MipsLabel label1, label2, label3, label4, label5, label6;
+  mips::MipsLabel label7, label8, label9, label10, label11, label12, label13;
   __ SetReorder(true);
 
   __ B(&label1);
@@ -2529,6 +2530,41 @@
   __ Bind(&label6);
   __ CodePosition();  // Even across Bind(), CodePosition() prevents absorbing the ADDU above.
 
+  __ Nop();
+  __ B(&label7);
+  __ Bind(&label7);
+  __ Lw(mips::V0, mips::A0, 0x5678);  // Possibly patchable instruction, not absorbed.
+
+  __ Nop();
+  __ B(&label8);
+  __ Bind(&label8);
+  __ Sw(mips::V0, mips::A0, 0x5678);  // Possibly patchable instruction, not absorbed.
+
+  __ Nop();
+  __ B(&label9);
+  __ Bind(&label9);
+  __ Addiu(mips::V0, mips::A0, 0x5678);  // Possibly patchable instruction, not absorbed.
+
+  __ Nop();
+  __ B(&label10);
+  __ Bind(&label10);
+  __ Lw(mips::V0, mips::A0, 0x5680);  // Immediate isn't 0x5678, absorbed.
+
+  __ Nop();
+  __ B(&label11);
+  __ Bind(&label11);
+  __ Sw(mips::V0, mips::A0, 0x5680);  // Immediate isn't 0x5678, absorbed.
+
+  __ Nop();
+  __ B(&label12);
+  __ Bind(&label12);
+  __ Addiu(mips::V0, mips::A0, 0x5680);  // Immediate isn't 0x5678, absorbed.
+
+  __ Nop();
+  __ B(&label13);
+  __ Bind(&label13);
+  __ Andi(mips::V0, mips::A0, 0x5678);  // Not one of patchable instructions, absorbed.
+
   std::string expected =
       ".set noreorder\n"
       "b 1f\n"
@@ -2550,7 +2586,49 @@
       "b 5f\n"
       "nop\n"
       "5:\n"
-      "addu $t0, $t1, $t2\n";
+      "addu $t0, $t1, $t2\n"
+
+      "nop\n"
+      "b 7f\n"
+      "nop\n"
+      "7:\n"
+      "lw $v0, 0x5678($a0)\n"
+
+      "nop\n"
+      "b 8f\n"
+      "nop\n"
+      "8:\n"
+      "sw $v0, 0x5678($a0)\n"
+
+      "nop\n"
+      "b 9f\n"
+      "nop\n"
+      "9:\n"
+      "addiu $v0, $a0, 0x5678\n"
+
+      "nop\n"
+      "b 10f\n"
+      "lw $v0, 0x5680($a0)\n"
+      "lw $v0, 0x5680($a0)\n"
+      "10:\n"
+
+      "nop\n"
+      "b 11f\n"
+      "sw $v0, 0x5680($a0)\n"
+      "sw $v0, 0x5680($a0)\n"
+      "11:\n"
+
+      "nop\n"
+      "b 12f\n"
+      "addiu $v0, $a0, 0x5680\n"
+      "addiu $v0, $a0, 0x5680\n"
+      "12:\n"
+
+      "nop\n"
+      "b 13f\n"
+      "andi $v0, $a0, 0x5678\n"
+      "andi $v0, $a0, 0x5678\n"
+      "13:\n";
   DriverStr(expected, "AbsorbTargetInstruction");
 }
 
@@ -2637,10 +2715,62 @@
   DriverStr(expected, "SetReorder");
 }
 
-TEST_F(AssemblerMIPSTest, LongBranchReorder) {
-  mips::MipsLabel label;
+TEST_F(AssemblerMIPSTest, ReorderPatchedInstruction) {
   __ SetReorder(true);
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  mips::MipsLabel label1, label2;
+  mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5;
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1);
+  __ Beq(mips::A0, mips::A1, &label1);
+  constexpr uint32_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label1);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2);
+  __ Bltz(mips::V1, &label2);
+  constexpr uint32_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label2);
+  __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3);
+  __ B(&label1);
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4);
+  __ Jalr(mips::T9);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5);
+  __ Blt(mips::V0, mips::V1, &label2);
+  __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+  std::string expected =
+      ".set noreorder\n"
+      "beq $a0, $a1, 1f\n"
+      "lw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bltz $v1, 2f\n"
+      "sw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "b 1b\n"
+      "addiu $v0, $a0, 0x5678\n"
+      "jalr $t9\n"
+      "lw $v0, 0x5678($a0)\n"
+      "slt $at, $v0, $v1\n"
+      "bnez $at, 2b\n"
+      "sw $v0, 0x5678($a0)\n"
+      "addu $zero, $zero, $zero\n";
+  DriverStr(expected, "ReorderPatchedInstruction");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 10) * 4u);
+}
+
+TEST_F(AssemblerMIPSTest, LongBranchReorder) {
+  mips::MipsLabel label, patcher_label1, patcher_label2;
+  __ SetReorder(true);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1);
   __ B(&label);
   constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
   for (size_t i = 0; i != kAdduCount1; ++i) {
@@ -2651,7 +2781,7 @@
   for (size_t i = 0; i != kAdduCount2; ++i) {
     __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
   }
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2);
   __ B(&label);
 
   // Account for 5 extra instructions: ori, addu, lw, jalr, addiu.
@@ -2662,7 +2792,7 @@
   std::ostringstream oss;
   oss <<
       ".set noreorder\n"
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "addiu $sp, $sp, -4\n"
       "sw $ra, 0($sp)\n"
       "bltzal $zero, .+4\n"
@@ -2674,7 +2804,7 @@
       "addiu $sp, $sp, 4\n" <<
       RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
       RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "addiu $sp, $sp, -4\n"
       "sw $ra, 0($sp)\n"
       "bltzal $zero, .+4\n"
@@ -2686,6 +2816,8 @@
       "addiu $sp, $sp, 4\n";
   std::string expected = oss.str();
   DriverStr(expected, "LongBranchReorder");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 10) * 4u);
 }
 
 ///////////////////////
diff --git a/openjdkjvmti/generate-operator-out.py b/openjdkjvmti/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/openjdkjvmti/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/openjdkjvmti/ti_phase.cc b/openjdkjvmti/ti_phase.cc
index 8893c9b..07cf31c 100644
--- a/openjdkjvmti/ti_phase.cc
+++ b/openjdkjvmti/ti_phase.cc
@@ -63,18 +63,25 @@
         break;
       case RuntimePhase::kStart:
         {
+          PhaseUtil::current_phase_ = JVMTI_PHASE_START;
           art::ScopedThreadSuspension sts(art::Thread::Current(), art::ThreadState::kNative);
           event_handler->DispatchEvent<ArtJvmtiEvent::kVmStart>(nullptr, GetJniEnv());
-          PhaseUtil::current_phase_ = JVMTI_PHASE_START;
         }
         break;
       case RuntimePhase::kInit:
         {
           ThreadUtil::CacheData();
-          ScopedLocalRef<jthread> thread(GetJniEnv(), GetCurrentJThread());
-          art::ScopedThreadSuspension sts(art::Thread::Current(), art::ThreadState::kNative);
-          event_handler->DispatchEvent<ArtJvmtiEvent::kVmInit>(nullptr, GetJniEnv(), thread.get());
           PhaseUtil::current_phase_ = JVMTI_PHASE_LIVE;
+          {
+            ScopedLocalRef<jthread> thread(GetJniEnv(), GetCurrentJThread());
+            art::ScopedThreadSuspension sts(art::Thread::Current(), art::ThreadState::kNative);
+            event_handler->DispatchEvent<ArtJvmtiEvent::kVmInit>(
+                nullptr, GetJniEnv(), thread.get());
+          }
+          // We need to have these events be ordered to match behavior expected by some real-world
+          // agents. The spec does not really require this but compatibility is a useful property to
+          // maintain.
+          ThreadUtil::VMInitEventSent();
         }
         break;
       case RuntimePhase::kDeath:
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 6fa73f8..b0a1a85 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -57,13 +57,14 @@
 
 art::ArtField* ThreadUtil::context_class_loader_ = nullptr;
 
-struct ThreadCallback : public art::ThreadLifecycleCallback, public art::RuntimePhaseCallback {
+struct ThreadCallback : public art::ThreadLifecycleCallback {
   jthread GetThreadObject(art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
     if (self->GetPeer() == nullptr) {
       return nullptr;
     }
     return self->GetJniEnv()->AddLocalReference<jthread>(self->GetPeer());
   }
+
   template <ArtJvmtiEvent kEvent>
   void Post(art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
     DCHECK_EQ(self, art::Thread::Current());
@@ -96,15 +97,6 @@
     Post<ArtJvmtiEvent::kThreadEnd>(self);
   }
 
-  void NextRuntimePhase(RuntimePhase phase) OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
-    if (phase == RuntimePhase::kInit) {
-      // We moved to VMInit. Report the main thread as started (it was attached early, and must
-      // not be reported until Init.
-      started = true;
-      Post<ArtJvmtiEvent::kThreadStart>(art::Thread::Current());
-    }
-  }
-
   EventHandler* event_handler = nullptr;
   bool started = false;
 };
@@ -121,10 +113,19 @@
                                     art::ThreadState::kWaitingForDebuggerToAttach);
   art::ScopedSuspendAll ssa("Add thread callback");
   runtime->GetRuntimeCallbacks()->AddThreadLifecycleCallback(&gThreadCallback);
-  runtime->GetRuntimeCallbacks()->AddRuntimePhaseCallback(&gThreadCallback);
+}
+
+void ThreadUtil::VMInitEventSent() {
+  // We should have already started.
+  DCHECK(gThreadCallback.started);
+  // We moved to VMInit. Report the main thread as started (it was attached early, and must not be
+  // reported until Init.
+  gThreadCallback.Post<ArtJvmtiEvent::kThreadStart>(art::Thread::Current());
 }
 
 void ThreadUtil::CacheData() {
+  // We must have started since it is now safe to cache our data;
+  gThreadCallback.started = true;
   art::ScopedObjectAccess soa(art::Thread::Current());
   art::ObjPtr<art::mirror::Class> thread_class =
       soa.Decode<art::mirror::Class>(art::WellKnownClasses::java_lang_Thread);
@@ -140,7 +141,6 @@
   art::ScopedSuspendAll ssa("Remove thread callback");
   art::Runtime* runtime = art::Runtime::Current();
   runtime->GetRuntimeCallbacks()->RemoveThreadLifecycleCallback(&gThreadCallback);
-  runtime->GetRuntimeCallbacks()->RemoveRuntimePhaseCallback(&gThreadCallback);
 }
 
 jvmtiError ThreadUtil::GetCurrentThread(jvmtiEnv* env ATTRIBUTE_UNUSED, jthread* thread_ptr) {
diff --git a/openjdkjvmti/ti_thread.h b/openjdkjvmti/ti_thread.h
index 03c49d7..a19974a 100644
--- a/openjdkjvmti/ti_thread.h
+++ b/openjdkjvmti/ti_thread.h
@@ -53,9 +53,14 @@
   static void Register(EventHandler* event_handler);
   static void Unregister();
 
-  // To be called when it is safe to cache data.
+  // To be called when it is safe to cache data. This means that we have at least entered the
+  // RuntimePhase::kInit but we might or might not have already called VMInit event.
   static void CacheData();
 
+  // Called just after we have sent the VMInit callback so that ThreadUtil can do final setup. This
+  // ensures that there are no timing issues between the two callbacks.
+  static void VMInitEventSent() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
   // Handle a jvmtiEnv going away.
   static void RemoveEnvironment(jvmtiEnv* env);
 
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 59a2c10..bb82d58 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -341,7 +341,7 @@
      * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 256)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
@@ -433,7 +433,7 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t0)
+    lw $t0, \runtime_method_offset($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -450,10 +450,10 @@
      * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
+.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     addiu  $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
     .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP \runtime_method_offset
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1
@@ -2008,10 +2008,10 @@
 
 // Macro for string and type resolution and initialization.
 // $a0 is both input and output.
-.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     .extern \entrypoint
 ENTRY_NO_GP \name
-    SETUP_SAVE_EVERYTHING_FRAME       # Save everything in case of GC.
+    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  # Save everything in case of GC.
     move    $s2, $gp                  # Preserve $gp across the call for exception delivery.
     la      $t9, \entrypoint
     jalr    $t9                       # (uint32_t index, Thread*)
@@ -2027,6 +2027,10 @@
 END \name
 .endm
 
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
+    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
+.endm
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
@@ -2039,12 +2043,12 @@
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
 
     /*
      * Entry from managed code when dex cache misses for a type_idx.
      */
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
 
     /*
      * Entry from managed code when type_idx needs to be checked for access and dex cache may also
@@ -2063,7 +2067,8 @@
     jalr   $zero, $ra
     nop
 1:
-    SETUP_SAVE_EVERYTHING_FRAME                      # save everything for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
+                                                     # save everything for stack crawl
     la     $t9, artTestSuspendFromCode
     jalr   $t9                                       # (Thread*)
     move   $a0, rSELF
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 3b92daa..7350c85 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -345,7 +345,7 @@
      *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 496)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
@@ -452,7 +452,7 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t1)
+    ld      $t1, \runtime_method_offset($t1)
     sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
     # Place sp in Thread::Current()->top_quick_frame.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
@@ -465,10 +465,10 @@
      *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
+.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     daddiu $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
     .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP \runtime_method_offset
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1
@@ -1925,10 +1925,10 @@
 
 // Macro for string and type resolution and initialization.
 // $a0 is both input and output.
-.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     .extern \entrypoint
 ENTRY_NO_GP \name
-    SETUP_SAVE_EVERYTHING_FRAME       # Save everything in case of GC.
+    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  # Save everything in case of GC.
     dla     $t9, \entrypoint
     jalr    $t9                       # (uint32_t index, Thread*)
     move    $a1, rSELF                # Pass Thread::Current (in delay slot).
@@ -1941,6 +1941,10 @@
 END \name
 .endm
 
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
+    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
+.endm
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
@@ -1953,12 +1957,12 @@
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
 
     /*
      * Entry from managed code when dex cache misses for a type_idx.
      */
-ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
 
     /*
      * Entry from managed code when type_idx needs to be checked for access and dex cache may also
@@ -1977,7 +1981,8 @@
     jalr   $zero, $ra
     nop
 1:
-    SETUP_SAVE_EVERYTHING_FRAME               # save everything for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
+                                              # save everything for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
     move   $a0, rSELF
     RESTORE_SAVE_EVERYTHING_FRAME
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0efc004..051c0c2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1021,6 +1021,9 @@
                            std::make_move_iterator(dex_files.begin()),
                            std::make_move_iterator(dex_files.end()));
   }
+  for (const std::unique_ptr<const DexFile>& dex_file : boot_dex_files_) {
+    OatDexFile::MadviseDexFile(*dex_file, MadviseState::kMadviseStateAtLoad);
+  }
   FinishInit(self);
 
   VLOG(startup) << __FUNCTION__ << " exiting";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 0af0622..c79b5c9 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -74,6 +74,9 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
+// If true, we advise the kernel about dex file mem map accesses.
+static constexpr bool kMadviseDexFileAccesses = false;
+
 // Note for OatFileBase and descendents:
 //
 // These are used in OatFile::Open to try all our loaders.
@@ -1495,6 +1498,9 @@
 
 // Madvise the dex file based on the state we are moving to.
 void OatDexFile::MadviseDexFile(const DexFile& dex_file, MadviseState state) {
+  if (!kMadviseDexFileAccesses) {
+    return;
+  }
   if (state == MadviseState::kMadviseStateAtLoad) {
     // Default every dex file to MADV_RANDOM when its loaded by default.
     MadviseLargestPageAlignedRegion(dex_file.Begin(),
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index de8f7ed..499f356 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -51,9 +51,6 @@
 // If true, we attempt to load the application image if it exists.
 static constexpr bool kEnableAppImage = true;
 
-// If true, we advise the kernel about dex file mem map accesses.
-static constexpr bool kMadviseDexFileAccesses = false;
-
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   DCHECK(oat_file != nullptr);
@@ -572,7 +569,7 @@
     }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
-    } else if (kMadviseDexFileAccesses) {
+    } else {
       // Opened dex files from an oat file, madvise them to their loaded state.
        for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
          OatDexFile::MadviseDexFile(*dex_file, MadviseState::kMadviseStateAtLoad);
diff --git a/runtime/obj_ptr.h b/runtime/obj_ptr.h
index 92cf4eb..4162873 100644
--- a/runtime/obj_ptr.h
+++ b/runtime/obj_ptr.h
@@ -29,7 +29,6 @@
 constexpr bool kObjPtrPoisoning = kIsDebugBuild;
 
 // Value type representing a pointer to a mirror::Object of type MirrorType
-// Pass kPoison as a template boolean for testing in non-debug builds.
 // Since the cookie is thread based, it is not safe to share an ObjPtr between threads.
 template<class MirrorType>
 class ObjPtr {
diff --git a/test/063-process-manager/src/Main.java b/test/063-process-manager/src/Main.java
index 311c4e7..e31a0df 100644
--- a/test/063-process-manager/src/Main.java
+++ b/test/063-process-manager/src/Main.java
@@ -16,7 +16,7 @@
         System.out.println("spawning child");
         ProcessBuilder pb = new ProcessBuilder("sleep", "5");
         Process proc = pb.start();
-        Thread.sleep(2000);
+        Thread.sleep(250);
         checkManager();
         proc.waitFor();
         System.out.println("child died");
diff --git a/test/067-preemptive-unpark/src/Main.java b/test/067-preemptive-unpark/src/Main.java
index beb3262..b674690 100644
--- a/test/067-preemptive-unpark/src/Main.java
+++ b/test/067-preemptive-unpark/src/Main.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 import sun.misc.Unsafe;
 
 import java.lang.reflect.Field;
@@ -25,9 +41,11 @@
         test.parkNow = true;
 
         try {
-            Thread.sleep(1500);
+            // Give some time to the ParkTester thread to honor the park command.
+            Thread.sleep(3000);
         } catch (InterruptedException ex) {
-            // Ignore it.
+            System.out.println("Main thread interrupted!");
+            System.exit(1);
         }
 
         if (test.success) {
diff --git a/test/1919-vminit-thread-start-timing/expected.txt b/test/1919-vminit-thread-start-timing/expected.txt
new file mode 100644
index 0000000..6361451
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/expected.txt
@@ -0,0 +1,4 @@
+VMInit: main
+ThreadStart: JVMTI_THREAD-Test1919
+Test1919AgentThread: JVMTI_THREAD-Test1919
+ThreadStart: main
diff --git a/test/1919-vminit-thread-start-timing/info.txt b/test/1919-vminit-thread-start-timing/info.txt
new file mode 100644
index 0000000..995f0a1
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/info.txt
@@ -0,0 +1,3 @@
+Tests basic functions in the jvmti plugin.
+
+Test the interaction of VMInit events and thread starts.
diff --git a/test/1919-vminit-thread-start-timing/run b/test/1919-vminit-thread-start-timing/run
new file mode 100755
index 0000000..c6e62ae
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/1919-vminit-thread-start-timing/src/Main.java b/test/1919-vminit-thread-start-timing/src/Main.java
new file mode 100644
index 0000000..65781b8
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    art.Test1919.run();
+  }
+}
diff --git a/test/1919-vminit-thread-start-timing/src/art/Main.java b/test/1919-vminit-thread-start-timing/src/art/Main.java
new file mode 100644
index 0000000..8b01920
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/src/art/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+// Binder class so the agent's C code has something that can be bound and exposed to tests.
+// In a package to separate cleanly and work around CTS reference issues (though this class
+// should be replaced in the CTS version).
+public class Main {
+  // Load the given class with the given classloader, and bind all native methods to corresponding
+  // C methods in the agent. Will abort if any of the steps fail.
+  public static native void bindAgentJNI(String className, ClassLoader classLoader);
+  // Same as above, giving the class directly.
+  public static native void bindAgentJNIForClass(Class<?> klass);
+}
diff --git a/test/1919-vminit-thread-start-timing/src/art/Test1919.java b/test/1919-vminit-thread-start-timing/src/art/Test1919.java
new file mode 100644
index 0000000..3d5c079
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/src/art/Test1919.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+public class Test1919 {
+  public static final boolean PRINT_ALL_THREADS = false;
+
+  public static void run() {
+    for (Event e : getEvents()) {
+      if (PRINT_ALL_THREADS ||
+          e.thr.equals(Thread.currentThread()) ||
+          e.thr.getName().equals("JVMTI_THREAD-Test1919")) {
+        System.out.println(e.name + ": " + e.thr.getName());
+      }
+    }
+  }
+
+  static class Event {
+    public final String name;
+    public final Thread thr;
+    public Event(String name, Thread thr) {
+      this.name = name;
+      this.thr = thr;
+    }
+  }
+
+  public static Event[] getEvents() {
+    String[] ns = getEventNames();
+    Thread[] ts = getEventThreads();
+    Event[] es = new Event[Math.min(ns.length, ts.length)];
+    for (int i = 0; i < es.length; i++) {
+      es[i] = new Event(ns[i], ts[i]);
+    }
+    return es;
+  }
+
+  public static native String[] getEventNames();
+  public static native Thread[] getEventThreads();
+}
diff --git a/test/1919-vminit-thread-start-timing/vminit.cc b/test/1919-vminit-thread-start-timing/vminit.cc
new file mode 100644
index 0000000..109c61f
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/vminit.cc
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "1919-vminit-thread-start-timing/vminit.h"
+
+#include <mutex>
+#include <thread>
+#include <vector>
+
+#include <jni.h>
+#include <stdio.h>
+#include <string.h>
+#include "android-base/macros.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "scoped_local_ref.h"
+#include "jvmti_helper.h"
+#include "jni_helper.h"
+#include "test_env.h"
+
+namespace art {
+namespace Test1919VMInitThreadStart {
+
+struct EventData {
+  std::string event;
+  jobject data;
+};
+
+struct EventList {
+  jrawMonitorID events_mutex;
+  std::vector<EventData> events;
+};
+
+
+static void EnableEvent(jvmtiEnv* env, jvmtiEvent evt) {
+  jvmtiError error = env->SetEventNotificationMode(JVMTI_ENABLE, evt, nullptr);
+  if (error != JVMTI_ERROR_NONE) {
+    printf("Failed to enable event");
+  }
+}
+
+static void JNICALL ThreadStartCallback(jvmtiEnv *jvmti, JNIEnv* env, jthread thread) {
+  EventList* list = nullptr;
+  CheckJvmtiError(jvmti, jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&list)));
+  CheckJvmtiError(jvmti, jvmti->RawMonitorEnter(list->events_mutex));
+  list->events.push_back({ "ThreadStart", env->NewGlobalRef(thread) });
+  CheckJvmtiError(jvmti, jvmti->RawMonitorExit(list->events_mutex));
+}
+
+static void JNICALL Test1919AgentThread(jvmtiEnv* jvmti,
+                                        JNIEnv* env,
+                                        void* arg ATTRIBUTE_UNUSED) {
+  EventList* list = nullptr;
+  CheckJvmtiError(jvmti, jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&list)));
+  CheckJvmtiError(jvmti, jvmti->RawMonitorEnter(list->events_mutex));
+  jthread cur;
+  CheckJvmtiError(jvmti, jvmti->GetCurrentThread(&cur));
+  list->events.push_back({ "Test1919AgentThread", env->NewGlobalRef(cur) });
+  env->DeleteLocalRef(cur);
+  // Wake up VMInit
+  CheckJvmtiError(jvmti, jvmti->RawMonitorNotify(list->events_mutex));
+  CheckJvmtiError(jvmti, jvmti->RawMonitorExit(list->events_mutex));
+}
+
+static void CreateAgentThread(jvmtiEnv* jvmti, JNIEnv* env) {
+  // Create a Thread object.
+  ScopedLocalRef<jobject> thread_name(env, env->NewStringUTF("JVMTI_THREAD-Test1919"));
+  CHECK(thread_name.get() != nullptr);
+
+  ScopedLocalRef<jclass> thread_klass(env, env->FindClass("java/lang/Thread"));
+  CHECK(thread_klass.get() != nullptr);
+
+  ScopedLocalRef<jobject> thread(env, env->AllocObject(thread_klass.get()));
+  CHECK(thread.get() != nullptr);
+
+  jmethodID initID = env->GetMethodID(thread_klass.get(), "<init>", "(Ljava/lang/String;)V");
+  CHECK(initID != nullptr);
+
+  env->CallNonvirtualVoidMethod(thread.get(), thread_klass.get(), initID, thread_name.get());
+  CHECK(!env->ExceptionCheck());
+
+  // Run agent thread.
+  CheckJvmtiError(jvmti, jvmti->RunAgentThread(thread.get(),
+                                               Test1919AgentThread,
+                                               nullptr,
+                                               JVMTI_THREAD_NORM_PRIORITY));
+}
+
+static void JNICALL VMInitCallback(jvmtiEnv *jvmti, JNIEnv* env, jthread thread) {
+  EventList* list = nullptr;
+  CheckJvmtiError(jvmti, jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&list)));
+  CheckJvmtiError(jvmti, jvmti->RawMonitorEnter(list->events_mutex));
+  list->events.push_back({ "VMInit", env->NewGlobalRef(thread) });
+  // Create a new thread.
+  CreateAgentThread(jvmti, env);
+  // Wait for new thread to run.
+  CheckJvmtiError(jvmti, jvmti->RawMonitorWait(list->events_mutex, 0));
+  CheckJvmtiError(jvmti, jvmti->RawMonitorExit(list->events_mutex));
+}
+
+static void InstallVMEvents(jvmtiEnv* env) {
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.VMInit = VMInitCallback;
+  callbacks.ThreadStart = ThreadStartCallback;
+  jvmtiError ret = env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (ret != JVMTI_ERROR_NONE) {
+    printf("Failed to install callbacks");
+  }
+
+  EnableEvent(env, JVMTI_EVENT_VM_INIT);
+  EnableEvent(env, JVMTI_EVENT_THREAD_START);
+}
+
+static void InstallEventList(jvmtiEnv* env) {
+  EventList* list = nullptr;
+  CheckJvmtiError(env, env->Allocate(sizeof(EventList), reinterpret_cast<unsigned char**>(&list)));
+  memset(list, 0, sizeof(EventList));
+  CheckJvmtiError(env, env->CreateRawMonitor("Test1919 Monitor", &list->events_mutex));
+  CheckJvmtiError(env, env->SetEnvironmentLocalStorage(list));
+}
+
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0) != 0) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  InstallVMEvents(jvmti_env);
+  InstallEventList(jvmti_env);
+  return 0;
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test1919_getEventNames(JNIEnv* env, jclass) {
+  EventList* list = nullptr;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->GetEnvironmentLocalStorage(
+                                reinterpret_cast<void**>(&list)))) {
+    return nullptr;
+  }
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorEnter(list->events_mutex))) {
+    return nullptr;
+  }
+  jobjectArray ret = CreateObjectArray(env, list->events.size(), "java/lang/String",
+                                       [&](jint i) {
+                                         return env->NewStringUTF(list->events[i].event.c_str());
+                                       });
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(list->events_mutex))) {
+    return nullptr;
+  }
+  return ret;
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test1919_getEventThreads(JNIEnv* env, jclass) {
+  EventList* list = nullptr;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->GetEnvironmentLocalStorage(
+                                reinterpret_cast<void**>(&list)))) {
+    return nullptr;
+  }
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorEnter(list->events_mutex))) {
+    return nullptr;
+  }
+  jobjectArray ret = CreateObjectArray(env, list->events.size(), "java/lang/Thread",
+                                       [&](jint i) {
+                                         return env->NewLocalRef(list->events[i].data);
+                                       });
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(list->events_mutex))) {
+    return nullptr;
+  }
+  return ret;
+}
+
+}  // namespace Test1919VMInitThreadStart
+}  // namespace art
diff --git a/test/1919-vminit-thread-start-timing/vminit.h b/test/1919-vminit-thread-start-timing/vminit.h
new file mode 100644
index 0000000..c4a5ea8
--- /dev/null
+++ b/test/1919-vminit-thread-start-timing/vminit.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_1919_VMINIT_THREAD_START_TIMING_VMINIT_H_
+#define ART_TEST_1919_VMINIT_THREAD_START_TIMING_VMINIT_H_
+
+#include <jni.h>
+
+namespace art {
+namespace Test1919VMInitThreadStart {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test1919VMInitThreadStart
+}  // namespace art
+
+#endif  // ART_TEST_1919_VMINIT_THREAD_START_TIMING_VMINIT_H_
diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc
index 2edd91e..472f2b7 100644
--- a/test/901-hello-ti-agent/basics.cc
+++ b/test/901-hello-ti-agent/basics.cc
@@ -38,20 +38,27 @@
   }
 }
 
-static void JNICALL VMStartCallback(jvmtiEnv *jenv ATTRIBUTE_UNUSED,
-                                     JNIEnv* jni_env ATTRIBUTE_UNUSED) {
-  printf("VMStart\n");
+static jvmtiPhase getPhase(jvmtiEnv* jenv) {
+  jvmtiPhase out = static_cast<jvmtiPhase>(-1);
+  jenv->GetPhase(&out);
+  return out;
 }
 
-static void JNICALL VMInitCallback(jvmtiEnv *jvmti_env ATTRIBUTE_UNUSED,
+static void JNICALL VMStartCallback(jvmtiEnv *jenv, JNIEnv* jni_env ATTRIBUTE_UNUSED) {
+  printf("VMStart (phase %d)\n", getPhase(jenv));
+  fsync(1);
+}
+
+static void JNICALL VMInitCallback(jvmtiEnv *jvmti_env,
                                    JNIEnv* jni_env ATTRIBUTE_UNUSED,
                                    jthread thread ATTRIBUTE_UNUSED) {
-  printf("VMInit\n");
+  printf("VMInit (phase %d)\n", getPhase(jvmti_env));
+  fsync(1);
 }
 
-static void JNICALL VMDeatchCallback(jvmtiEnv *jenv ATTRIBUTE_UNUSED,
-                                     JNIEnv* jni_env ATTRIBUTE_UNUSED) {
-  printf("VMDeath\n");
+static void JNICALL VMDeatchCallback(jvmtiEnv *jenv, JNIEnv* jni_env ATTRIBUTE_UNUSED) {
+  printf("VMDeath (phase %d)\n", getPhase(jenv));
+  fsync(1);
 }
 
 
diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt
index 4177ffc..73e389c 100644
--- a/test/901-hello-ti-agent/expected.txt
+++ b/test/901-hello-ti-agent/expected.txt
@@ -1,6 +1,6 @@
 Loaded Agent for test 901-hello-ti-agent
-VMStart
-VMInit
+VMStart (phase 6)
+VMInit (phase 4)
 Hello, world!
 Agent in live phase.
 Received expected error for unattached JVMTI calls
@@ -73,4 +73,4 @@
 115 = JVMTI_ERROR_UNATTACHED_THREAD
 116 = JVMTI_ERROR_INVALID_ENVIRONMENT
 1 times JVMTI_ERROR_ILLEGAL_ARGUMENT
-VMDeath
+VMDeath (phase 4)
diff --git a/test/Android.bp b/test/Android.bp
index 7413ee5..fcb8788 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -295,6 +295,7 @@
         "1908-suspend-native-resume-self/native_suspend_resume.cc",
         "1909-per-agent-tls/agent_tls.cc",
         "1914-get-local-instance/local_instance.cc",
+        "1919-vminit-thread-start-timing/vminit.cc",
     ],
     shared_libs: [
         "libbase",
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index e989e39..90e2600 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -658,6 +658,8 @@
     vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
   elif [ "$TEST_VDEX" = "y" ]; then
     vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
+  elif [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
+    vdex_cmdline="${dex2oat_cmdline} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
   fi
 fi
 
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index 1d13c62..d85f33a 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -28,6 +28,7 @@
 #include "909-attach-agent/attach.h"
 #include "936-search-onload/search_onload.h"
 #include "983-source-transform-verify/source_transform.h"
+#include "1919-vminit-thread-start-timing/vminit.h"
 
 namespace art {
 
@@ -82,6 +83,7 @@
   { "941-recursive-obsolete-jit", common_redefine::OnLoad, nullptr },
   { "943-private-recursive-jit", common_redefine::OnLoad, nullptr },
   { "983-source-transform-verify", Test983SourceTransformVerify::OnLoad, nullptr },
+  { "1919-vminit-thread-start-timing", Test1919VMInitThreadStart::OnLoad, nullptr },
 };
 
 static AgentLib* FindAgent(char* name) {