Merge "Fix issue with RawMonitorWait."
diff --git a/Android.mk b/Android.mk
index 8735d7c..7081f7b 100644
--- a/Android.mk
+++ b/Android.mk
@@ -457,7 +457,7 @@
 build-art-target: $(TARGET_OUT_EXECUTABLES)/art $(ART_TARGET_DEPENDENCIES) $(TARGET_CORE_IMG_OUTS)
 
 ########################################################################
-# Phony target for only building what go/lem requires on target.
+# Phony target for only building what go/lem requires for pushing ART on /data.
 .PHONY: build-art-target-golem
 # Also include libartbenchmark, we always include it when running golem.
 # libstdc++ is needed when building for ART_TARGET_LINUX.
@@ -482,6 +482,11 @@
                       $(ART_HOST_SHARED_LIBRARY_BENCHMARK)
 
 ########################################################################
+# Phony target for building what go/lem requires for syncing /system to target.
+.PHONY: build-art-unbundled-golem
+build-art-unbundled-golem: art-runtime linker oatdump $(TARGET_CORE_JARS)
+
+########################################################################
 # Rules for building all dependencies for tests.
 
 .PHONY: build-art-host-tests
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index a9a718f..0d38620 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -95,7 +95,7 @@
     const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                           compiled_method->GetInstructionSet());
     LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code;
-    class_linker_->SetEntryPointsToCompiledCode(method, method_code);
+    method->SetEntryPointFromQuickCompiledCode(method_code);
   } else {
     // No code? You must mean to go into the interpreter.
     // Or the generic JNI...
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index d7e3a28..0b2d7f4 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -62,6 +62,9 @@
 
 namespace {  // anonymous namespace
 
+// If we write dex layout info in the oat file.
+static constexpr bool kWriteDexLayoutInfo = true;
+
 typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
 
 const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
@@ -288,10 +291,14 @@
   uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
   uint32_t method_bss_mapping_offset_;
+  uint32_t dex_sections_layout_offset_;
 
   // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
+  // Dex section layout info to serialize.
+  DexLayoutSections dex_sections_layout_;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
@@ -362,6 +369,9 @@
     size_oat_dex_file_offset_(0),
     size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
+    size_oat_dex_file_dex_layout_sections_offset_(0),
+    size_oat_dex_file_dex_layout_sections_(0),
+    size_oat_dex_file_dex_layout_sections_alignment_(0),
     size_oat_dex_file_method_bss_mapping_offset_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
@@ -571,11 +581,16 @@
     }
   }
 
-  // Write TypeLookupTables into OAT.
+  // Write type lookup tables into the oat file.
   if (!WriteTypeLookupTables(&checksum_updating_rodata, dex_files)) {
     return false;
   }
 
+  // Write dex layout sections into the oat file.
+  if (!WriteDexLayoutSections(&checksum_updating_rodata, dex_files)) {
+    return false;
+  }
+
   *opened_dex_files_map = std::move(dex_files_map);
   *opened_dex_files = std::move(dex_files);
   write_state_ = WriteState::kPrepareLayout;
@@ -2320,6 +2335,9 @@
     DO_STAT(size_oat_dex_file_offset_);
     DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
+    DO_STAT(size_oat_dex_file_dex_layout_sections_offset_);
+    DO_STAT(size_oat_dex_file_dex_layout_sections_);
+    DO_STAT(size_oat_dex_file_dex_layout_sections_alignment_);
     DO_STAT(size_oat_dex_file_method_bss_mapping_offset_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
@@ -2808,6 +2826,7 @@
   if (!WriteDexFile(out, oat_dex_file, mem_map->Begin(), /* update_input_vdex */ false)) {
     return false;
   }
+  oat_dex_file->dex_sections_layout_ = dex_layout.GetSections();
   // Set the checksum of the new oat dex file to be the original file's checksum.
   oat_dex_file->dex_file_location_checksum_ = dex_file->GetLocationChecksum();
   return true;
@@ -3153,6 +3172,70 @@
   return true;
 }
 
+bool OatWriter::WriteDexLayoutSections(
+    OutputStream* oat_rodata,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split(__FUNCTION__, timings_);
+
+  if (!kWriteDexLayoutInfo) {
+    return true;;
+  }
+
+  uint32_t expected_offset = oat_data_offset_ + oat_size_;
+  off_t actual_offset = oat_rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to dex layout section offset section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << oat_rodata->GetLocation();
+    return false;
+  }
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  size_t rodata_offset = oat_size_;
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    DCHECK_EQ(oat_dex_file->dex_sections_layout_offset_, 0u);
+
+    // Write dex layout section alignment bytes.
+    const size_t padding_size =
+        RoundUp(rodata_offset, alignof(DexLayoutSections)) - rodata_offset;
+    if (padding_size != 0u) {
+      std::vector<uint8_t> buffer(padding_size, 0u);
+      if (!oat_rodata->WriteFully(buffer.data(), padding_size)) {
+        PLOG(ERROR) << "Failed to write lookup table alignment padding."
+                    << " File: " << oat_dex_file->GetLocation()
+                    << " Output: " << oat_rodata->GetLocation();
+        return false;
+      }
+      size_oat_dex_file_dex_layout_sections_alignment_ += padding_size;
+      rodata_offset += padding_size;
+    }
+
+    DCHECK_ALIGNED(rodata_offset, alignof(DexLayoutSections));
+    DCHECK_EQ(oat_data_offset_ + rodata_offset,
+              static_cast<size_t>(oat_rodata->Seek(0u, kSeekCurrent)));
+    DCHECK(oat_dex_file != nullptr);
+    if (!oat_rodata->WriteFully(&oat_dex_file->dex_sections_layout_,
+                                sizeof(oat_dex_file->dex_sections_layout_))) {
+      PLOG(ERROR) << "Failed to write dex layout sections."
+                  << " File: " << oat_dex_file->GetLocation()
+                  << " Output: " << oat_rodata->GetLocation();
+      return false;
+    }
+    oat_dex_file->dex_sections_layout_offset_ = rodata_offset;
+    size_oat_dex_file_dex_layout_sections_ += sizeof(oat_dex_file->dex_sections_layout_);
+    rodata_offset += sizeof(oat_dex_file->dex_sections_layout_);
+  }
+  oat_size_ = rodata_offset;
+
+  if (!oat_rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing type dex layout sections."
+                << " File: " << oat_rodata->GetLocation();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) {
   if (!kIsVdexEnabled) {
     return true;
@@ -3252,6 +3335,7 @@
       class_offsets_offset_(0u),
       lookup_table_offset_(0u),
       method_bss_mapping_offset_(0u),
+      dex_sections_layout_offset_(0u),
       class_offsets_() {
 }
 
@@ -3262,7 +3346,8 @@
           + sizeof(dex_file_offset_)
           + sizeof(class_offsets_offset_)
           + sizeof(lookup_table_offset_)
-          + sizeof(method_bss_mapping_offset_);
+          + sizeof(method_bss_mapping_offset_)
+          + sizeof(dex_sections_layout_offset_);
 }
 
 bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
@@ -3305,6 +3390,12 @@
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
 
+  if (!out->WriteFully(&dex_sections_layout_offset_, sizeof(dex_sections_layout_offset_))) {
+    PLOG(ERROR) << "Failed to write dex section layout info to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_dex_layout_sections_offset_ += sizeof(dex_sections_layout_offset_);
+
   if (!out->WriteFully(&method_bss_mapping_offset_, sizeof(method_bss_mapping_offset_))) {
     PLOG(ERROR) << "Failed to write method bss mapping offset to " << out->GetLocation();
     return false;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 470d69e..8db00f7 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -324,6 +324,8 @@
   bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
   bool WriteTypeLookupTables(OutputStream* oat_rodata,
                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
+  bool WriteDexLayoutSections(OutputStream* oat_rodata,
+                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
   bool WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat);
   void SetMultiOatRelativePatcherAdjustment();
@@ -455,6 +457,9 @@
   uint32_t size_oat_dex_file_offset_;
   uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
+  uint32_t size_oat_dex_file_dex_layout_sections_offset_;
+  uint32_t size_oat_dex_file_dex_layout_sections_;
+  uint32_t size_oat_dex_file_dex_layout_sections_alignment_;
   uint32_t size_oat_dex_file_method_bss_mapping_offset_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2927e1f..0d9d3d4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -31,12 +31,6 @@
 
 namespace art {
 
-void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
-  if (compilation_stats_ != nullptr) {
-    compilation_stats_->RecordStat(compilation_stat);
-  }
-}
-
 bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
   if (compiler_driver_ == nullptr) {
     // Note that the compiler driver is null when unit testing.
@@ -53,7 +47,8 @@
     VLOG(compiler) << "Skip compilation of huge method "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
@@ -63,7 +58,8 @@
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 43429cf..2c9a9ef 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -109,7 +109,6 @@
   static constexpr const char* kBuilderPassName = "builder";
 
  private:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
   bool SkipCompilation(size_t number_of_branches);
 
   HGraph* const graph_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d7d0fff..1e5f1ec 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -610,12 +610,6 @@
   }
 }
 
-void CodeGenerator::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
-  if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat, count);
-  }
-}
-
 std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
                                                      InstructionSet instruction_set,
                                                      const InstructionSetFeatures& isa_features,
@@ -1212,10 +1206,10 @@
 
 void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
   if (compiler_options_.GetImplicitNullChecks()) {
-    MaybeRecordStat(kImplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, kImplicitNullCheckGenerated);
     GenerateImplicitNullCheck(instruction);
   } else {
-    MaybeRecordStat(kExplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, kExplicitNullCheckGenerated);
     GenerateExplicitNullCheck(instruction);
   }
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 51a0bae..30c2b52 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -254,8 +254,6 @@
 
   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
-
   // Saves the register in the stack. Returns the size taken on stack.
   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
   // Restores the register from the stack. Returns the size taken on stack.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index b6eb5c1..2e78af5 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -6573,7 +6573,8 @@
           DCHECK(!label_low);
           __ AddUpper(base, obj, offset_high);
         }
-        __ Beqz(T9, (isR6 ? 2 : 4));  // Skip jialc / addiu+jalr+nop.
+        MipsLabel skip_call;
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         if (label_low != nullptr) {
           DCHECK(short_offset);
           __ Bind(label_low);
@@ -6588,6 +6589,7 @@
           __ Jalr(T9);
           __ Nop();
         }
+        __ Bind(&skip_call);
         __ SetReorder(reordering);
       } else {
         // Note that we do not actually check the value of `GetIsGcMarking()`
@@ -6724,27 +6726,31 @@
     __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
     Register ref_reg = ref.AsRegister<Register>();
     Register base = short_offset ? obj : TMP;
+    MipsLabel skip_call;
     if (short_offset) {
       if (isR6) {
-        __ Beqzc(T9, 2);  // Skip jialc.
+        __ Beqzc(T9, &skip_call, /* is_bare */ true);
         __ Nop();  // In forbidden slot.
         __ Jialc(T9, thunk_disp);
       } else {
-        __ Beqz(T9, 3);  // Skip jalr+nop.
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         __ Addiu(T9, T9, thunk_disp);  // In delay slot.
         __ Jalr(T9);
         __ Nop();  // In delay slot.
       }
+      __ Bind(&skip_call);
     } else {
       if (isR6) {
-        __ Beqz(T9, 2);  // Skip jialc.
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         __ Aui(base, obj, offset_high);  // In delay slot.
         __ Jialc(T9, thunk_disp);
+        __ Bind(&skip_call);
       } else {
         __ Lui(base, offset_high);
-        __ Beqz(T9, 2);  // Skip jalr.
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         __ Addiu(T9, T9, thunk_disp);  // In delay slot.
         __ Jalr(T9);
+        __ Bind(&skip_call);
         __ Addu(base, base, obj);  // In delay slot.
       }
     }
@@ -6826,15 +6832,18 @@
     Register index_reg = index.IsRegisterPair()
         ? index.AsRegisterPairLow<Register>()
         : index.AsRegister<Register>();
+    MipsLabel skip_call;
     if (GetInstructionSetFeatures().IsR6()) {
-      __ Beqz(T9, 2);  // Skip jialc.
+      __ Beqz(T9, &skip_call, /* is_bare */ true);
       __ Lsa(TMP, index_reg, obj, scale_factor);  // In delay slot.
       __ Jialc(T9, thunk_disp);
+      __ Bind(&skip_call);
     } else {
       __ Sll(TMP, index_reg, scale_factor);
-      __ Beqz(T9, 2);  // Skip jalr.
+      __ Beqz(T9, &skip_call, /* is_bare */ true);
       __ Addiu(T9, T9, thunk_disp);  // In delay slot.
       __ Jalr(T9);
+      __ Bind(&skip_call);
       __ Addu(TMP, TMP, obj);  // In delay slot.
     }
     // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3e79f47..1d59694 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -4490,7 +4490,8 @@
           DCHECK(!label_low);
           __ Daui(base, obj, offset_high);
         }
-        __ Beqz(T9, 2);  // Skip jialc.
+        Mips64Label skip_call;
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         if (label_low != nullptr) {
           DCHECK(short_offset);
           __ Bind(label_low);
@@ -4499,6 +4500,7 @@
         __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low);  // Single instruction
                                                                            // in delay slot.
         __ Jialc(T9, thunk_disp);
+        __ Bind(&skip_call);
       } else {
         // Note that we do not actually check the value of `GetIsGcMarking()`
         // to decide whether to mark the loaded GC root or not.  Instead, we
@@ -4617,18 +4619,21 @@
     // threads are suspended or running a checkpoint.
     __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
     GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+    Mips64Label skip_call;
     if (short_offset) {
-      __ Beqzc(T9, 2);  // Skip jialc.
+      __ Beqzc(T9, &skip_call, /* is_bare */ true);
       __ Nop();  // In forbidden slot.
       __ Jialc(T9, thunk_disp);
+      __ Bind(&skip_call);
       // /* HeapReference<Object> */ ref = *(obj + offset)
       __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset);  // Single instruction.
     } else {
       int16_t offset_low = Low16Bits(offset);
       int16_t offset_high = High16Bits(offset - offset_low);  // Accounts for sign extension in lwu.
-      __ Beqz(T9, 2);  // Skip jialc.
+      __ Beqz(T9, &skip_call, /* is_bare */ true);
       __ Daui(TMP, obj, offset_high);  // In delay slot.
       __ Jialc(T9, thunk_disp);
+      __ Bind(&skip_call);
       // /* HeapReference<Object> */ ref = *(obj + offset)
       __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low);  // Single instruction.
     }
@@ -4702,11 +4707,13 @@
     // Loading the entrypoint does not require a load acquire since it is only changed when
     // threads are suspended or running a checkpoint.
     __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
-    __ Beqz(T9, 2);  // Skip jialc.
+    Mips64Label skip_call;
+    __ Beqz(T9, &skip_call, /* is_bare */ true);
     GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
     GpuRegister index_reg = index.AsRegister<GpuRegister>();
     __ Dlsa(TMP, index_reg, obj, scale_factor);  // In delay slot.
     __ Jialc(T9, thunk_disp);
+    __ Bind(&skip_call);
     // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))
     DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset;
     __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset);  // Single instruction.
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ea36e90..6bf28ab 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -819,11 +819,74 @@
 }
 
 void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+      DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = instr->GetLocations();
+  VectorRegister acc =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+  VectorRegister left =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+  VectorRegister right =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvB(acc, left, right);
+      } else {
+        __ MsubvB(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvH(acc, left, right);
+      } else {
+        __ MsubvH(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvW(acc, left, right);
+      } else {
+        __ MsubvW(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvD(acc, left, right);
+      } else {
+        __ MsubvD(acc, left, right);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 0395db1..75bf7a7 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -823,11 +823,74 @@
 }
 
 void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+      DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LOG(FATAL) << "No SIMD for " << instr->GetId();
+  LocationSummary* locations = instr->GetLocations();
+  VectorRegister acc =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+  VectorRegister left =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+  VectorRegister right =
+      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvB(acc, left, right);
+      } else {
+        __ MsubvB(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvH(acc, left, right);
+      } else {
+        __ MsubvH(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvW(acc, left, right);
+      } else {
+        __ MsubvW(acc, left, right);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ MaddvD(acc, left, right);
+      } else {
+        __ MsubvD(acc, left, right);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index e598e19..6c3a9fd 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -414,7 +414,7 @@
     if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
       continue;
     }
-    MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk);
     instruction->MoveBefore(position, /* ensure_safety */ false);
   }
 }
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index c31c66a..787296d 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -359,7 +359,7 @@
       DCHECK(!inst->IsControlFlow());
       if (inst->IsDeadAndRemovable()) {
         block->RemoveInstruction(inst);
-        MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
+        MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction);
       }
     }
   }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0141c26..6567a3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -75,7 +75,7 @@
 #define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
 #define LOG_NOTE() LOG_INTERNAL("Note: ")
 #define LOG_SUCCESS() LOG_INTERNAL("Success: ")
-#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL(stats_ptr, stat) MaybeRecordStat(stats_ptr, stat); LOG_INTERNAL("Fail: ")
 #define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
 
 std::string HInliner::DepthString(int line) const {
@@ -440,9 +440,9 @@
         // Add dependency due to devirtulization. We've assumed resolved_method
         // has single implementation.
         outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
-        MaybeRecordStat(kCHAInline);
+        MaybeRecordStat(stats_, kCHAInline);
       } else {
-        MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+        MaybeRecordStat(stats_, kInlinedInvokeVirtualOrInterface);
       }
     }
     return result;
@@ -532,7 +532,7 @@
     }
 
     case kInlineCacheMonomorphic: {
-      MaybeRecordStat(kMonomorphicCall);
+      MaybeRecordStat(stats_, kMonomorphicCall);
       if (UseOnlyPolymorphicInliningWithNoDeopt()) {
         return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
@@ -541,7 +541,7 @@
     }
 
     case kInlineCachePolymorphic: {
-      MaybeRecordStat(kPolymorphicCall);
+      MaybeRecordStat(stats_, kPolymorphicCall);
       return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
     }
 
@@ -550,7 +550,7 @@
           << "Interface or virtual call to "
           << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
           << " is megamorphic and not inlined";
-      MaybeRecordStat(kMegamorphicCall);
+      MaybeRecordStat(stats_, kMegamorphicCall);
       return false;
     }
 
@@ -754,7 +754,7 @@
   dex::TypeIndex class_index = FindClassIndexIn(
       GetMonomorphicType(classes), caller_compilation_unit_);
   if (!class_index.IsValid()) {
-    LOG_FAIL(kNotInlinedDexCache)
+    LOG_FAIL(stats_, kNotInlinedDexCache)
         << "Call to " << ArtMethod::PrettyMethod(resolved_method)
         << " from inline cache is not inlined because its class is not"
         << " accessible to the caller";
@@ -803,7 +803,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(kInlinedMonomorphicCall);
+  MaybeRecordStat(stats_, kInlinedMonomorphicCall);
   return true;
 }
 
@@ -993,7 +993,7 @@
     return false;
   }
 
-  MaybeRecordStat(kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
 
   // Run type propagation to get the guards typed.
   ReferenceTypePropagation rtp_fixup(graph_,
@@ -1199,7 +1199,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
 
   LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
   return true;
@@ -1300,14 +1300,14 @@
                                  ReferenceTypeInfo receiver_type,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    LOG_FAIL(kNotInlinedProxy)
+    LOG_FAIL(stats_, kNotInlinedProxy)
         << "Method " << method->PrettyMethod()
         << " is not inlined because of unimplemented inline support for proxy methods.";
     return false;
   }
 
   if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
-    LOG_FAIL(kNotInlinedRecursiveBudget)
+    LOG_FAIL(stats_, kNotInlinedRecursiveBudget)
         << "Method "
         << method->PrettyMethod()
         << " is not inlined because it has reached its recursive call budget.";
@@ -1321,10 +1321,10 @@
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
       LOG_SUCCESS() << "Successfully replaced pattern of invoke "
                     << method->PrettyMethod();
-      MaybeRecordStat(kReplacedInvokeWithSimplePattern);
+      MaybeRecordStat(stats_, kReplacedInvokeWithSimplePattern);
       return true;
     }
-    LOG_FAIL(kNotInlinedWont)
+    LOG_FAIL(stats_, kNotInlinedWont)
         << "Won't inline " << method->PrettyMethod() << " in "
         << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
         << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
@@ -1344,7 +1344,7 @@
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    LOG_FAIL(kNotInlinedCodeItem)
+    LOG_FAIL(stats_, kNotInlinedCodeItem)
         << "Method " << method->PrettyMethod()
         << " is not inlined because its code item is too big: "
         << code_item->insns_size_in_code_units_
@@ -1354,13 +1354,13 @@
   }
 
   if (code_item->tries_size_ != 0) {
-    LOG_FAIL(kNotInlinedTryCatch)
+    LOG_FAIL(stats_, kNotInlinedTryCatch)
         << "Method " << method->PrettyMethod() << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    LOG_FAIL(kNotInlinedNotVerified)
+    LOG_FAIL(stats_, kNotInlinedNotVerified)
         << "Method " << method->PrettyMethod()
         << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
@@ -1370,7 +1370,7 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      LOG_FAIL(kNotInlinedNotVerified)
+      LOG_FAIL(stats_, kNotInlinedNotVerified)
           << "Method " << method->PrettyMethod()
           << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -1381,7 +1381,7 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+    LOG_FAIL(stats_, kNotInlinedDexCache) << "Method " << method->PrettyMethod()
              << " is not inlined because it is static and requires a clinit"
              << " check that cannot be emitted due to Dex cache limitations";
     return false;
@@ -1393,7 +1393,7 @@
   }
 
   LOG_SUCCESS() << method->PrettyMethod();
-  MaybeRecordStat(kInlinedInvoke);
+  MaybeRecordStat(stats_, kInlinedInvoke);
   return true;
 }
 
@@ -1677,7 +1677,7 @@
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    LOG_FAIL(kNotInlinedCannotBuild)
+    LOG_FAIL(stats_, kNotInlinedCannotBuild)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be built, so cannot be inlined";
     return false;
@@ -1685,7 +1685,7 @@
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    LOG_FAIL(kNotInlinedRegisterAllocator)
+    LOG_FAIL(stats_, kNotInlinedRegisterAllocator)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " cannot be inlined because of the register allocator";
     return false;
@@ -1738,7 +1738,7 @@
 
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    LOG_FAIL(kNotInlinedInfiniteLoop)
+    LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it has an infinite loop";
     return false;
@@ -1749,14 +1749,14 @@
     if (predecessor->GetLastInstruction()->IsThrow()) {
       if (invoke_instruction->GetBlock()->IsTryBlock()) {
         // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
-        LOG_FAIL(kNotInlinedTryCatch)
+        LOG_FAIL(stats_, kNotInlinedTryCatch)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller is in a try/catch block";
         return false;
       } else if (graph_->GetExitBlock() == nullptr) {
         // TODO(ngeoffray): Support adding HExit in the caller graph.
-        LOG_FAIL(kNotInlinedInfiniteLoop)
+        LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller does not have an exit block";
@@ -1775,7 +1775,7 @@
   }
 
   if (!has_one_return) {
-    LOG_FAIL(kNotInlinedAlwaysThrows)
+    LOG_FAIL(stats_, kNotInlinedAlwaysThrows)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it always throws";
     return false;
@@ -1788,7 +1788,7 @@
       if (block->GetLoopInformation()->IsIrreducible()) {
         // Don't inline methods with irreducible loops, they could prevent some
         // optimizations to run.
-        LOG_FAIL(kNotInlinedIrreducibleLoop)
+        LOG_FAIL(stats_, kNotInlinedIrreducibleLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains an irreducible loop";
         return false;
@@ -1797,7 +1797,7 @@
         // Don't inline methods with loops without exit, since they cause the
         // loop information to be computed incorrectly when updating after
         // inlining.
-        LOG_FAIL(kNotInlinedLoopWithoutExit)
+        LOG_FAIL(stats_, kNotInlinedLoopWithoutExit)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains a loop with no exit";
         return false;
@@ -1808,7 +1808,7 @@
          !instr_it.Done();
          instr_it.Advance()) {
       if (++number_of_instructions >= inlining_budget_) {
-        LOG_FAIL(kNotInlinedInstructionBudget)
+        LOG_FAIL(stats_, kNotInlinedInstructionBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because the outer method has reached"
             << " its instruction budget limit.";
@@ -1817,7 +1817,7 @@
       HInstruction* current = instr_it.Current();
       if (current->NeedsEnvironment() &&
           (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
-        LOG_FAIL(kNotInlinedEnvironmentBudget)
+        LOG_FAIL(stats_, kNotInlinedEnvironmentBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because its caller has reached"
             << " its environment budget limit.";
@@ -1827,7 +1827,7 @@
       if (current->NeedsEnvironment() &&
           !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
                                             resolved_method)) {
-        LOG_FAIL(kNotInlinedStackMaps)
+        LOG_FAIL(stats_, kNotInlinedStackMaps)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " needs an environment, is in a different dex file"
@@ -1836,7 +1836,7 @@
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        LOG_FAIL(kNotInlinedDexCache)
+        LOG_FAIL(stats_, kNotInlinedDexCache)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " it is in a different dex file and requires access to the dex cache";
@@ -1848,7 +1848,7 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+        LOG_FAIL(stats_, kNotInlinedUnresolvedEntrypoint)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it is using an unresolved"
             << " entrypoint";
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 143c77f..ca3b191 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -28,12 +28,6 @@
 
 namespace art {
 
-void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
-  if (compilation_stats_ != nullptr) {
-    compilation_stats_->RecordStat(compilation_stat);
-  }
-}
-
 HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
   return block_builder_->GetBlockAt(dex_pc);
 }
@@ -670,6 +664,9 @@
       DCHECK(fence_target != nullptr);
 
       AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
+      MaybeRecordStat(
+          compilation_stats_,
+          MethodCompilationStat::kConstructorFenceGeneratedFinal);
     }
     AppendInstruction(new (arena_) HReturnVoid(dex_pc));
   } else {
@@ -816,7 +813,8 @@
   ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
 
   if (UNLIKELY(resolved_method == nullptr)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
@@ -1039,6 +1037,9 @@
   HConstructorFence* ctor_fence =
       new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_);
   AppendInstruction(ctor_fence);
+  MaybeRecordStat(
+      compilation_stats_,
+      MethodCompilationStat::kConstructorFenceGeneratedNew);
 }
 
 static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
@@ -1122,7 +1123,8 @@
       VLOG(compiler) << "Did not compile "
                      << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kNotCompiledMalformedOpcode);
       return false;
     }
     HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
@@ -1136,7 +1138,8 @@
     VLOG(compiler) << "Did not compile "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kNotCompiledMalformedOpcode);
     return false;
   }
 
@@ -1286,7 +1289,8 @@
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
     HInstruction* field_set = nullptr;
     if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kUnresolvedField);
       field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
                                                            value,
                                                            field_type,
@@ -1309,7 +1313,8 @@
   } else {
     HInstruction* field_get = nullptr;
     if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kUnresolvedField);
       field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
                                                            field_type,
                                                            field_index,
@@ -1444,7 +1449,8 @@
   ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put);
 
   if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedField);
     Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
     return true;
@@ -1462,7 +1468,8 @@
   if (constant == nullptr) {
     // The class cannot be referenced from this compiled code. Generate
     // an unresolved access.
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+    MaybeRecordStat(compilation_stats_,
+                    MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
     return true;
   }
@@ -2823,7 +2830,8 @@
                      << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of unhandled instruction "
                      << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      MaybeRecordStat(compilation_stats_,
+                      MethodCompilationStat::kNotCompiledUnhandledInstruction);
       return false;
   }
   return true;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 2a9b9f5..b7fa394 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -78,8 +78,6 @@
   bool Build();
 
  private:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
-
   void InitializeBlockLocals();
   void PropagateLocalsToCatchBlocks();
   void SetLoopHeaderPhiInputs();
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 02cfbbc..f2a829f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -43,13 +43,7 @@
   void RecordSimplification() {
     simplification_occurred_ = true;
     simplifications_at_current_position_++;
-    MaybeRecordStat(kInstructionSimplifications);
-  }
-
-  void MaybeRecordStat(MethodCompilationStat stat) {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(stat);
-    }
+    MaybeRecordStat(stats_, kInstructionSimplifications);
   }
 
   bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
@@ -65,6 +59,7 @@
   bool TryDeMorganNegationFactoring(HBinaryOperation* op);
   bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
   bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+  bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
 
   void VisitShift(HBinaryOperation* shift);
 
@@ -104,6 +99,7 @@
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitInvoke(HInvoke* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
+  void VisitVecMul(HVecMul* instruction) OVERRIDE;
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
@@ -249,6 +245,84 @@
   return false;
 }
 
+bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) {
+  Primitive::Type type = mul->GetPackedType();
+  InstructionSet isa = codegen_->GetInstructionSet();
+  switch (isa) {
+    case kArm64:
+      if (!(type == Primitive::kPrimByte ||
+            type == Primitive::kPrimChar ||
+            type == Primitive::kPrimShort ||
+            type == Primitive::kPrimInt)) {
+        return false;
+      }
+      break;
+    case kMips:
+    case kMips64:
+      if (!(type == Primitive::kPrimByte ||
+            type == Primitive::kPrimChar ||
+            type == Primitive::kPrimShort ||
+            type == Primitive::kPrimInt ||
+            type == Primitive::kPrimLong)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* use = mul->GetUses().front().GetUser();
+    if (use->IsVecAdd() || use->IsVecSub()) {
+      // Replace code looking like
+      //    VECMUL tmp, x, y
+      //    VECADD/SUB dst, acc, tmp
+      // with
+      //    VECMULACC dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HVecBinaryOperation* binop = use->AsVecBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // This is always true since the `HVecMul` has only one use (which is checked above).
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsVecAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      HInstruction::InstructionKind kind =
+          use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+      if (accumulator != nullptr) {
+        HVecMultiplyAccumulate* mulacc =
+            new (arena) HVecMultiplyAccumulate(arena,
+                                               kind,
+                                               accumulator,
+                                               mul->GetLeft(),
+                                               mul->GetRight(),
+                                               binop->GetPackedType(),
+                                               binop->GetVectorLength());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HInstruction* shift_amount = instruction->GetRight();
@@ -517,7 +591,7 @@
 
   if (object->IsNullConstant()) {
     check_cast->GetBlock()->RemoveInstruction(check_cast);
-    MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
+    MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
     return;
   }
 
@@ -527,7 +601,7 @@
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
-      MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast);
+      MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
       if (!load_class->HasUses()) {
         // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
         // However, here we know that it cannot because the checkcast was successfull, hence
@@ -557,7 +631,7 @@
 
   HGraph* graph = GetGraph();
   if (object->IsNullConstant()) {
-    MaybeRecordStat(kRemovedInstanceOf);
+    MaybeRecordStat(stats_, kRemovedInstanceOf);
     instruction->ReplaceWith(graph->GetIntConstant(0));
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
@@ -568,7 +642,7 @@
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
-    MaybeRecordStat(kRemovedInstanceOf);
+    MaybeRecordStat(stats_, kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
       HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object);
@@ -2307,4 +2381,10 @@
   return true;
 }
 
+void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) {
+  if (TryCombineVecMultiplyAccumulate(instruction)) {
+    RecordSimplification();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 311be1f..7c9bfb1 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -210,12 +210,6 @@
   }
 }
 
-void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
-  if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) {
-    RecordSimplification();
-  }
-}
-
 void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
   if (!instruction->IsStringCharAt()
       && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 8596f6a..4f16fc3 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -74,7 +74,6 @@
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
-  void VisitVecMul(HVecMul* instruction) OVERRIDE;
   void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
   void VisitVecStore(HVecStore* instruction) OVERRIDE;
 
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index d1bc4da..7a759b9 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -281,73 +281,6 @@
   return true;
 }
 
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
-  Primitive::Type type = mul->GetPackedType();
-  switch (isa) {
-    case kArm64:
-      if (!(type == Primitive::kPrimByte ||
-            type == Primitive::kPrimChar ||
-            type == Primitive::kPrimShort ||
-            type == Primitive::kPrimInt)) {
-        return false;
-      }
-      break;
-    default:
-      return false;
-  }
-
-  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
-
-  if (mul->HasOnlyOneNonEnvironmentUse()) {
-    HInstruction* use = mul->GetUses().front().GetUser();
-    if (use->IsVecAdd() || use->IsVecSub()) {
-      // Replace code looking like
-      //    VECMUL tmp, x, y
-      //    VECADD/SUB dst, acc, tmp
-      // with
-      //    VECMULACC dst, acc, x, y
-      // Note that we do not want to (unconditionally) perform the merge when the
-      // multiplication has multiple uses and it can be merged in all of them.
-      // Multiple uses could happen on the same control-flow path, and we would
-      // then increase the amount of work. In the future we could try to evaluate
-      // whether all uses are on different control-flow paths (using dominance and
-      // reverse-dominance information) and only perform the merge when they are.
-      HInstruction* accumulator = nullptr;
-      HVecBinaryOperation* binop = use->AsVecBinaryOperation();
-      HInstruction* binop_left = binop->GetLeft();
-      HInstruction* binop_right = binop->GetRight();
-      // This is always true since the `HVecMul` has only one use (which is checked above).
-      DCHECK_NE(binop_left, binop_right);
-      if (binop_right == mul) {
-        accumulator = binop_left;
-      } else if (use->IsVecAdd()) {
-        DCHECK_EQ(binop_left, mul);
-        accumulator = binop_right;
-      }
-
-      HInstruction::InstructionKind kind =
-          use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
-      if (accumulator != nullptr) {
-        HVecMultiplyAccumulate* mulacc =
-            new (arena) HVecMultiplyAccumulate(arena,
-                                               kind,
-                                               accumulator,
-                                               mul->GetLeft(),
-                                               mul->GetRight(),
-                                               binop->GetPackedType(),
-                                               binop->GetVectorLength());
-
-        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-        DCHECK(!mul->HasUses());
-        mul->GetBlock()->RemoveInstruction(mul);
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
   if (index->IsConstant()) {
     // If index is constant the whole address calculation often can be done by LDR/STR themselves.
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 371619f..31e2383 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -58,7 +58,6 @@
                                   HInstruction* index,
                                   size_t data_offset);
 
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
 
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 7bdeef5..11725f4 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -154,7 +154,8 @@
                                  NeedsEnvironmentOrCache(intrinsic),
                                  GetSideEffects(intrinsic),
                                  GetExceptions(intrinsic));
-            MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
+            MaybeRecordStat(stats_,
+                            MethodCompilationStat::kIntrinsicRecognized);
           }
         }
       }
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index f0086fb..10524b0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -141,7 +141,7 @@
             DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
-          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
+          MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) {
           // If `instruction` can do something visible (throw or write),
           // we cannot move further instructions that can throw.
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index fddda3d..98b8592 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -40,8 +40,9 @@
  public:
   LSEVisitor(HGraph* graph,
              const HeapLocationCollector& heap_locations_collector,
-             const SideEffectsAnalysis& side_effects)
-      : HGraphVisitor(graph),
+             const SideEffectsAnalysis& side_effects,
+             OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph, stats),
         heap_location_collector_(heap_locations_collector),
         side_effects_(side_effects),
         heap_values_for_(graph->GetBlocks().size(),
@@ -100,7 +101,10 @@
     //   * - Constructor fences (they never escape this thread).
     //   * - Allocations (if they are unused).
     for (HInstruction* new_instance : singleton_new_instances_) {
-      HConstructorFence::RemoveConstructorFences(new_instance);
+      size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedLSE,
+                      removed);
 
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
@@ -108,7 +112,10 @@
       }
     }
     for (HInstruction* new_array : singleton_new_arrays_) {
-      HConstructorFence::RemoveConstructorFences(new_array);
+      size_t removed = HConstructorFence::RemoveConstructorFences(new_array);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedLSE,
+                      removed);
 
       if (!new_array->HasNonEnvironmentUses()) {
         new_array->RemoveEnvironmentUsers();
@@ -663,7 +670,7 @@
     return;
   }
 
-  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
+  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_);
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     lse_visitor.VisitBasicBlock(block);
   }
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index efe71c7..20a8a76 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -28,8 +28,9 @@
  public:
   LoadStoreElimination(HGraph* graph,
                        const SideEffectsAnalysis& side_effects,
-                       const LoadStoreAnalysis& lsa)
-      : HOptimization(graph, kLoadStoreEliminationPassName),
+                       const LoadStoreAnalysis& lsa,
+                       OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoadStoreEliminationPassName, stats),
         side_effects_(side_effects),
         lsa_(lsa) {}
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ddd798b..1510eaf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1216,11 +1216,14 @@
   DCHECK_EQ(0u, InputCount());
 }
 
-void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
   DCHECK(instruction->GetBlock() != nullptr);
   // Removing constructor fences only makes sense for instructions with an object return type.
   DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
 
+  // Return how many instructions were removed for statistic purposes.
+  size_t remove_count = 0;
+
   // Efficient implementation that simultaneously (in one pass):
   // * Scans the uses list for all constructor fences.
   // * Deletes that constructor fence from the uses list of `instruction`.
@@ -1268,6 +1271,7 @@
       // is removed.
       if (ctor_fence->InputCount() == 0u) {
         ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+        ++remove_count;
       }
     }
   }
@@ -1281,6 +1285,8 @@
     }
     CHECK(instruction->GetBlock() != nullptr);
   }
+
+  return remove_count;
 }
 
 HInstruction* HConstructorFence::GetAssociatedAllocation() {
@@ -1745,6 +1751,10 @@
   return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsGoto();
 }
 
+bool HBasicBlock::IsSingleReturn() const {
+  return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsReturn();
+}
+
 bool HBasicBlock::IsSingleTryBoundary() const {
   return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsTryBoundary();
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 488d472..f60d532 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -959,6 +959,7 @@
   }
 
   bool IsSingleGoto() const;
+  bool IsSingleReturn() const;
   bool IsSingleTryBoundary() const;
 
   // Returns true if this block emits nothing but a jump.
@@ -6631,7 +6632,9 @@
   // This must *not* be called during/after prepare_for_register_allocation,
   // because that removes all the inputs to the fences but the fence is actually
   // still considered live.
-  static void RemoveConstructorFences(HInstruction* instruction);
+  //
+  // Returns how many HConstructorFence instructions were removed from graph.
+  static size_t RemoveConstructorFences(HInstruction* instruction);
 
   // Check if this constructor fence is protecting
   // an HNewInstance or HNewArray that is also the immediate
@@ -6879,9 +6882,13 @@
 
 namespace art {
 
+class OptimizingCompilerStats;
+
 class HGraphVisitor : public ValueObject {
  public:
-  explicit HGraphVisitor(HGraph* graph) : graph_(graph) {}
+  explicit HGraphVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr)
+      : stats_(stats),
+        graph_(graph) {}
   virtual ~HGraphVisitor() {}
 
   virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
@@ -6903,6 +6910,9 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+ protected:
+  OptimizingCompilerStats* stats_;
+
  private:
   HGraph* const graph_;
 
@@ -6911,7 +6921,8 @@
 
 class HGraphDelegateVisitor : public HGraphVisitor {
  public:
-  explicit HGraphDelegateVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+  explicit HGraphDelegateVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr)
+      : HGraphVisitor(graph, stats) {}
   virtual ~HGraphDelegateVisitor() {}
 
   // Visit functions that delegate to to super class.
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 3d76949..1e68ca2 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -17,11 +17,4 @@
 #include "optimization.h"
 
 namespace art {
-
-void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
-  if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat, count);
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 0819fb0..ce41a2e 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -47,8 +47,6 @@
   virtual void Run() = 0;
 
  protected:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
-
   HGraph* const graph_;
   // Used to record stats about the optimization.
   OptimizingCompilerStats* const stats_;
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 77a63ac..fde55cb 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -148,27 +148,27 @@
     0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B,
     0x0E, 0x40,
 };
-// 0x00000000: addiu r29, r29, -64
+// 0x00000000: addiu sp, sp, -64
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sw r31, +60(r29)
+// 0x00000004: sw ra, +60(sp)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +56(r29)
+// 0x00000008: sw s1, +56(sp)
 // 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +52(r29)
+// 0x0000000c: sw s0, +52(sp)
 // 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: sdc1 f22, +40(r29)
-// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000010: sdc1 f22, +40(sp)
+// 0x00000014: sdc1 f20, +32(sp)
 // 0x00000018: .cfi_remember_state
-// 0x00000018: lw r31, +60(r29)
+// 0x00000018: lw ra, +60(sp)
 // 0x0000001c: .cfi_restore: r31
-// 0x0000001c: lw r17, +56(r29)
+// 0x0000001c: lw s1, +56(sp)
 // 0x00000020: .cfi_restore: r17
-// 0x00000020: lw r16, +52(r29)
+// 0x00000020: lw s0, +52(sp)
 // 0x00000024: .cfi_restore: r16
-// 0x00000024: ldc1 f22, +40(r29)
-// 0x00000028: ldc1 f20, +32(r29)
-// 0x0000002c: jr r31
-// 0x00000030: addiu r29, r29, 64
+// 0x00000024: ldc1 f22, +40(sp)
+// 0x00000028: ldc1 f20, +32(sp)
+// 0x0000002c: jr ra
+// 0x00000030: addiu sp, sp, 64
 // 0x00000034: .cfi_def_cfa_offset: 0
 // 0x00000034: .cfi_restore_state
 // 0x00000034: .cfi_def_cfa_offset: 64
@@ -185,32 +185,32 @@
     0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44,
     0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: daddiu r29, r29, -64
+// 0x00000000: daddiu sp, sp, -64
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sd r31, +56(r29)
+// 0x00000004: sd ra, +56(sp)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +48(r29)
+// 0x00000008: sd s1, +48(sp)
 // 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +40(r29)
+// 0x0000000c: sd s0, +40(sp)
 // 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +32(r29)
+// 0x00000010: sdc1 f25, +32(sp)
 // 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +24(r29)
+// 0x00000014: sdc1 f24, +24(sp)
 // 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: .cfi_remember_state
-// 0x00000018: ld r31, +56(r29)
+// 0x00000018: ld ra, +56(sp)
 // 0x0000001c: .cfi_restore: r31
-// 0x0000001c: ld r17, +48(r29)
+// 0x0000001c: ld s1, +48(sp)
 // 0x00000020: .cfi_restore: r17
-// 0x00000020: ld r16, +40(r29)
+// 0x00000020: ld s0, +40(sp)
 // 0x00000024: .cfi_restore: r16
-// 0x00000024: ldc1 f25, +32(r29)
+// 0x00000024: ldc1 f25, +32(sp)
 // 0x00000028: .cfi_restore: r57
-// 0x00000028: ldc1 f24, +24(r29)
+// 0x00000028: ldc1 f24, +24(sp)
 // 0x0000002c: .cfi_restore: r56
-// 0x0000002c: daddiu r29, r29, 64
+// 0x0000002c: daddiu sp, sp, 64
 // 0x00000030: .cfi_def_cfa_offset: 0
-// 0x00000030: jic r31, 0
+// 0x00000030: jic ra, 0
 // 0x00000034: .cfi_restore_state
 // 0x00000034: .cfi_def_cfa_offset: 64
 
@@ -330,7 +330,7 @@
 static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
     0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
     0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
-    0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+    0x08, 0x00, 0x80, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
     0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
     0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
     0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
@@ -345,42 +345,42 @@
     0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
     0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: addiu r29, r29, -64
+// 0x00000000: addiu sp, sp, -64
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sw r31, +60(r29)
+// 0x00000004: sw ra, +60(sp)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +56(r29)
+// 0x00000008: sw s1, +56(sp)
 // 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +52(r29)
+// 0x0000000c: sw s0, +52(sp)
 // 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: sdc1 f22, +40(r29)
-// 0x00000014: sdc1 f20, +32(r29)
-// 0x00000018: bne r0, r4, 0x00000040 ; +36
-// 0x0000001c: addiu r29, r29, -4
+// 0x00000010: sdc1 f22, +40(sp)
+// 0x00000014: sdc1 f20, +32(sp)
+// 0x00000018: bnez a0, 0x0000003c ; +36
+// 0x0000001c: addiu sp, sp, -4
 // 0x00000020: .cfi_def_cfa_offset: 68
-// 0x00000020: sw r31, +0(r29)
-// 0x00000024: bltzal r0, 0x0000002c ; +4
-// 0x00000028: lui r1, 0x20000
-// 0x0000002c: ori r1, r1, 24
-// 0x00000030: addu r1, r1, r31
-// 0x00000034: lw r31, +0(r29)
-// 0x00000038: jr r1
-// 0x0000003c: addiu r29, r29, 4
+// 0x00000020: sw ra, +0(sp)
+// 0x00000024: nal
+// 0x00000028: lui at, 2
+// 0x0000002c: ori at, at, 24
+// 0x00000030: addu at, at, ra
+// 0x00000034: lw ra, +0(sp)
+// 0x00000038: jr at
+// 0x0000003c: addiu sp, sp, 4
 // 0x00000040: .cfi_def_cfa_offset: 64
 // 0x00000040: nop
 //             ...
 // 0x00020040: nop
 // 0x00020044: .cfi_remember_state
-// 0x00020044: lw r31, +60(r29)
+// 0x00020044: lw ra, +60(sp)
 // 0x00020048: .cfi_restore: r31
-// 0x00020048: lw r17, +56(r29)
+// 0x00020048: lw s1, +56(sp)
 // 0x0002004c: .cfi_restore: r17
-// 0x0002004c: lw r16, +52(r29)
+// 0x0002004c: lw s0, +52(sp)
 // 0x00020050: .cfi_restore: r16
-// 0x00020050: ldc1 f22, +40(r29)
-// 0x00020054: ldc1 f20, +32(r29)
-// 0x00020058: jr r31
-// 0x0002005c: addiu r29, r29, 64
+// 0x00020050: ldc1 f22, +40(sp)
+// 0x00020054: ldc1 f20, +32(sp)
+// 0x00020058: jr ra
+// 0x0002005c: addiu sp, sp, 64
 // 0x00020060: .cfi_def_cfa_offset: 0
 // 0x00020060: .cfi_restore_state
 // 0x00020060: .cfi_def_cfa_offset: 64
@@ -401,37 +401,37 @@
     0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E,
     0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: daddiu r29, r29, -64
+// 0x00000000: daddiu sp, sp, -64
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sd r31, +56(r29)
+// 0x00000004: sd ra, +56(sp)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +48(r29)
+// 0x00000008: sd s1, +48(sp)
 // 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +40(r29)
+// 0x0000000c: sd s0, +40(sp)
 // 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +32(r29)
+// 0x00000010: sdc1 f25, +32(sp)
 // 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +24(r29)
+// 0x00000014: sdc1 f24, +24(sp)
 // 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: bnec r5, r6, 0x00000024 ; +12
-// 0x0000001c: auipc r1, 2
-// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080
+// 0x00000018: bnec a1, a2, 0x00000024 ; +12
+// 0x0000001c: auipc at, 2
+// 0x00000020: jic at, 12 ; bc 0x00020028 ; +131080
 // 0x00000024: nop
 //             ...
 // 0x00020024: nop
 // 0x00020028: .cfi_remember_state
-// 0x00020028: ld r31, +56(r29)
+// 0x00020028: ld ra, +56(sp)
 // 0x0002002c: .cfi_restore: r31
-// 0x0002002c: ld r17, +48(r29)
+// 0x0002002c: ld s1, +48(sp)
 // 0x00020030: .cfi_restore: r17
-// 0x00020030: ld r16, +40(r29)
+// 0x00020030: ld s0, +40(sp)
 // 0x00020034: .cfi_restore: r16
-// 0x00020034: ldc1 f25, +32(r29)
+// 0x00020034: ldc1 f25, +32(sp)
 // 0x00020038: .cfi_restore: r57
-// 0x00020038: ldc1 f24, +24(r29)
+// 0x00020038: ldc1 f24, +24(sp)
 // 0x0002003c: .cfi_restore: r56
-// 0x0002003c: daddiu r29, r29, 64
+// 0x0002003c: daddiu sp, sp, 64
 // 0x00020040: .cfi_def_cfa_offset: 0
-// 0x00020040: jic r31, 0
+// 0x00020040: jic ra, 0
 // 0x00020044: .cfi_restore_state
 // 0x00020044: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a6c33b4..e98c97c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -327,12 +327,6 @@
 
   void UnInit() const OVERRIDE;
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat) const {
-    if (compilation_stats_.get() != nullptr) {
-      compilation_stats_->RecordStat(compilation_stat);
-    }
-  }
-
   bool JitCompile(Thread* self,
                   jit::JitCodeCache* code_cache,
                   ArtMethod* method,
@@ -495,7 +489,7 @@
   } else if (opt_name == HSharpening::kSharpeningPassName) {
     return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
   } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
-    return new (arena) HSelectGenerator(graph, stats);
+    return new (arena) HSelectGenerator(graph, handles, stats);
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
@@ -510,7 +504,8 @@
   } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr);
     CHECK(most_recent_lsa != nullptr);
-    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa);
+    return
+        new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa, stats);
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
@@ -714,11 +709,12 @@
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
                               PassObserver* pass_observer,
-                              RegisterAllocator::Strategy strategy) {
+                              RegisterAllocator::Strategy strategy,
+                              OptimizingCompilerStats* stats) {
   {
     PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
                     pass_observer);
-    PrepareForRegisterAllocation(graph).Run();
+    PrepareForRegisterAllocation(graph, stats).Run();
   }
   SsaLivenessAnalysis liveness(graph, codegen);
   {
@@ -762,7 +758,7 @@
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(
       graph, codegen, driver, stats);
-  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, handles, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
       graph, "constant_folding$after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
@@ -776,7 +772,7 @@
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
   LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
@@ -892,7 +888,8 @@
                                               ArtMethod* method,
                                               bool osr,
                                               VariableSizedHandleScope* handles) const {
-  MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
+  MaybeRecordStat(compilation_stats_.get(),
+                  MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
@@ -902,12 +899,14 @@
 
   // Do not attempt to compile on architectures we do not support.
   if (!IsInstructionSetSupported(instruction_set)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
     return nullptr;
   }
 
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
   }
 
@@ -917,7 +916,8 @@
   const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
   if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace)
       && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledSpaceFilter);
     return nullptr;
   }
 
@@ -964,7 +964,8 @@
                             compiler_driver->GetCompilerOptions(),
                             compilation_stats_.get()));
   if (codegen.get() == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
@@ -993,17 +994,25 @@
     GraphAnalysisResult result = builder.BuildGraph();
     if (result != kAnalysisSuccess) {
       switch (result) {
-        case kAnalysisSkipped:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+        case kAnalysisSkipped: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledSkipped);
+        }
           break;
-        case kAnalysisInvalidBytecode:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
+        case kAnalysisInvalidBytecode: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledInvalidBytecode);
+        }
           break;
-        case kAnalysisFailThrowCatchLoop:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+        case kAnalysisFailThrowCatchLoop: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledThrowCatchLoop);
+        }
           break;
-        case kAnalysisFailAmbiguousArrayOp:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+        case kAnalysisFailAmbiguousArrayOp: {
+          MaybeRecordStat(compilation_stats_.get(),
+                          MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+        }
           break;
         case kAnalysisSuccess:
           UNREACHABLE();
@@ -1022,7 +1031,11 @@
 
   RegisterAllocator::Strategy regalloc_strategy =
     compiler_options.GetRegisterAllocationStrategy();
-  AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+  AllocateRegisters(graph,
+                    codegen.get(),
+                    &pass_observer,
+                    regalloc_strategy,
+                    compilation_stats_.get());
 
   codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
@@ -1070,7 +1083,8 @@
                      &handles));
     }
     if (codegen.get() != nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kCompiled);
+      MaybeRecordStat(compilation_stats_.get(),
+                      MethodCompilationStat::kCompiled);
       method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
 
       if (kArenaAllocatorCountAllocations) {
@@ -1081,11 +1095,13 @@
       }
     }
   } else {
+    MethodCompilationStat method_stat;
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+      method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime;
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
+      method_stat = MethodCompilationStat::kNotCompiledVerificationError;
     }
+    MaybeRecordStat(compilation_stats_.get(), method_stat);
   }
 
   if (kIsDebugBuild &&
@@ -1214,7 +1230,7 @@
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
-  MaybeRecordStat(MethodCompilationStat::kCompiled);
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
                           MemoryRegion(method_info_data, method_info_size),
                           *code_item);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index a211c54..d6da73c 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -23,6 +23,7 @@
 #include <type_traits>
 
 #include "atomic.h"
+#include "globals.h"
 
 namespace art {
 
@@ -86,6 +87,10 @@
   kNotInlinedWont,
   kNotInlinedRecursiveBudget,
   kNotInlinedProxy,
+  kConstructorFenceGeneratedNew,
+  kConstructorFenceGeneratedFinal,
+  kConstructorFenceRemovedLSE,
+  kConstructorFenceRemovedPFRA,
   kLastStat
 };
 
@@ -202,6 +207,10 @@
       case kNotInlinedWont: name = "NotInlinedWont"; break;
       case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
       case kNotInlinedProxy: name = "NotInlinedProxy"; break;
+      case kConstructorFenceGeneratedNew: name = "ConstructorFenceGeneratedNew"; break;
+      case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break;
+      case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break;
+      case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
@@ -216,6 +225,14 @@
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
 };
 
+inline void MaybeRecordStat(OptimizingCompilerStats* compiler_stats,
+                            MethodCompilationStat stat,
+                            uint32_t count = 1) {
+  if (compiler_stats != nullptr) {
+    compiler_stats->RecordStat(stat, count);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 7c6b69f..5de707a 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -17,6 +17,7 @@
 #include "prepare_for_register_allocation.h"
 
 #include "jni_internal.h"
+#include "optimizing_compiler_stats.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -190,8 +191,9 @@
       // TODO: GetAssociatedAllocation should not care about multiple inputs
       // if we are in prepare_for_register_allocation pass only.
       constructor_fence->GetBlock()->RemoveInstruction(constructor_fence);
+      MaybeRecordStat(stats_,
+                      MethodCompilationStat::kConstructorFenceRemovedPFRA);
       return;
-      // TODO: actually remove the dmb from the .S entrypoints (initialized variants only).
     }
 
     // HNewArray does not need this check because the art_quick_alloc_array does not itself
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 395d4ba..2c64f01 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -21,6 +21,8 @@
 
 namespace art {
 
+class OptimizingCompilerStats;
+
 /**
  * A simplification pass over the graph before doing register allocation.
  * For example it changes uses of null checks and bounds checks to the original
@@ -28,7 +30,9 @@
  */
 class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
  public:
-  explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphDelegateVisitor(graph) {}
+  explicit PrepareForRegisterAllocation(HGraph* graph,
+                                        OptimizingCompilerStats* stats = nullptr)
+      : HGraphDelegateVisitor(graph, stats) {}
 
   void Run();
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 561c9ea..93613a5 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -754,8 +754,23 @@
   }
 }
 
+void ReferenceTypePropagation::FixUpInstructionType(HInstruction* instruction,
+                                                    VariableSizedHandleScope* handle_scope) {
+  if (instruction->IsSelect()) {
+    ScopedObjectAccess soa(Thread::Current());
+    HandleCache handle_cache(handle_scope);
+    HSelect* select = instruction->AsSelect();
+    ReferenceTypeInfo false_rti = select->GetFalseValue()->GetReferenceTypeInfo();
+    ReferenceTypeInfo true_rti = select->GetTrueValue()->GetReferenceTypeInfo();
+    select->SetReferenceTypeInfo(MergeTypes(false_rti, true_rti, &handle_cache));
+  } else {
+    LOG(FATAL) << "Invalid instruction in FixUpInstructionType";
+  }
+}
+
 ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
-                                                       const ReferenceTypeInfo& b) {
+                                                       const ReferenceTypeInfo& b,
+                                                       HandleCache* handle_cache) {
   if (!b.IsValid()) {
     return a;
   }
@@ -780,7 +795,7 @@
     is_exact = false;
   } else if (!a_is_interface && !b_is_interface) {
     result_type_handle =
-        handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
+        handle_cache->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
     is_exact = false;
   } else {
     // This can happen if:
@@ -790,7 +805,7 @@
     //        void foo(Interface i, boolean cond) {
     //          Object o = cond ? i : new Object();
     //        }
-    result_type_handle = handle_cache_.GetObjectClassHandle();
+    result_type_handle = handle_cache->GetObjectClassHandle();
     is_exact = false;
   }
 
@@ -916,7 +931,7 @@
     if (inputs[i]->IsNullConstant()) {
       continue;
     }
-    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo());
+    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo(), &handle_cache_);
     if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
         break;
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index b19f473..c221282 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -54,6 +54,12 @@
 
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
+  // Fix the reference type for an instruction whose inputs have changed.
+  // For a select instruction, the reference types of the inputs are merged
+  // and the resulting reference type is set on the select instruction.
+  static void FixUpInstructionType(HInstruction* instruction,
+                                   VariableSizedHandleScope* handle_scope);
+
  private:
   class HandleCache {
    public:
@@ -101,7 +107,9 @@
   static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
+  static ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
+                                      const ReferenceTypeInfo& b,
+                                      HandleCache* handle_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void ValidateTypes();
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index d537459..cb2af91 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -49,7 +49,7 @@
   // Relay method to merge type in reference type propagation.
   ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
                                const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) {
-    return propagation_->MergeTypes(a, b);
+    return propagation_->MergeTypes(a, b, &propagation_->handle_cache_);
   }
 
   // Helper method to construct an invalid type.
@@ -163,4 +163,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 46d0d0e..e220d32 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -20,9 +20,16 @@
 
 static constexpr size_t kMaxInstructionsInBranch = 1u;
 
-// Returns true if `block` has only one predecessor, ends with a Goto and
-// contains at most `kMaxInstructionsInBranch` other movable instruction with
-// no side-effects.
+HSelectGenerator::HSelectGenerator(HGraph* graph,
+                                   VariableSizedHandleScope* handles,
+                                   OptimizingCompilerStats* stats)
+    : HOptimization(graph, kSelectGeneratorPassName, stats),
+      handle_scope_(handles) {
+}
+
+// Returns true if `block` has only one predecessor, ends with a Goto
+// or a Return and contains at most `kMaxInstructionsInBranch` other
+// movable instruction with no side-effects.
 static bool IsSimpleBlock(HBasicBlock* block) {
   if (block->GetPredecessors().size() != 1u) {
     return false;
@@ -33,7 +40,10 @@
   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* instruction = it.Current();
     if (instruction->IsControlFlow()) {
-      return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch;
+      if (num_instructions > kMaxInstructionsInBranch) {
+        return false;
+      }
+      return instruction->IsGoto() || instruction->IsReturn();
     } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
       num_instructions++;
     } else {
@@ -45,8 +55,8 @@
   UNREACHABLE();
 }
 
-// Returns true if 'block1' and 'block2' are empty, merge into the same single
-// successor and the successor can only be reached from them.
+// Returns true if 'block1' and 'block2' are empty and merge into the
+// same single successor.
 static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
   return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
 }
@@ -94,53 +104,73 @@
     // If the branches are not empty, move instructions in front of the If.
     // TODO(dbrazdil): This puts an instruction between If and its condition.
     //                 Implement moving of conditions to first users if possible.
-    if (!true_block->IsSingleGoto()) {
+    if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
       true_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
-    if (!false_block->IsSingleGoto()) {
+    if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
       false_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
-    DCHECK(true_block->IsSingleGoto());
-    DCHECK(false_block->IsSingleGoto());
+    DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
+    DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
 
     // Find the resulting true/false values.
     size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
     size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
     DCHECK_NE(predecessor_index_true, predecessor_index_false);
 
+    bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
     HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
-    if (phi == nullptr) {
+
+    HInstruction* true_value = nullptr;
+    HInstruction* false_value = nullptr;
+    if (both_successors_return) {
+      true_value = true_block->GetFirstInstruction()->InputAt(0);
+      false_value = false_block->GetFirstInstruction()->InputAt(0);
+    } else if (phi != nullptr) {
+      true_value = phi->InputAt(predecessor_index_true);
+      false_value = phi->InputAt(predecessor_index_false);
+    } else {
       continue;
     }
-    HInstruction* true_value = phi->InputAt(predecessor_index_true);
-    HInstruction* false_value = phi->InputAt(predecessor_index_false);
+    DCHECK(both_successors_return || phi != nullptr);
 
     // Create the Select instruction and insert it in front of the If.
     HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0),
                                                        true_value,
                                                        false_value,
                                                        if_instruction->GetDexPc());
-    if (phi->GetType() == Primitive::kPrimNot) {
+    if (both_successors_return) {
+      if (true_value->GetType() == Primitive::kPrimNot) {
+        DCHECK(false_value->GetType() == Primitive::kPrimNot);
+        ReferenceTypePropagation::FixUpInstructionType(select, handle_scope_);
+      }
+    } else if (phi->GetType() == Primitive::kPrimNot) {
       select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
     }
     block->InsertInstructionBefore(select, if_instruction);
 
-    // Remove the true branch which removes the corresponding Phi input.
-    // If left only with the false branch, the Phi is automatically removed.
-    phi->ReplaceInput(select, predecessor_index_false);
+    // Remove the true branch which removes the corresponding Phi
+    // input if needed. If left only with the false branch, the Phi is
+    // automatically removed.
+    if (both_successors_return) {
+      false_block->GetFirstInstruction()->ReplaceInput(select, 0);
+    } else {
+      phi->ReplaceInput(select, predecessor_index_false);
+    }
+
     bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
     true_block->DisconnectAndDelete();
-    DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
 
     // Merge remaining blocks which are now connected with Goto.
     DCHECK_EQ(block->GetSingleSuccessor(), false_block);
     block->MergeWith(false_block);
-    if (only_two_predecessors) {
+    if (!both_successors_return && only_two_predecessors) {
+      DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
       DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
       block->MergeWith(merge_block);
     }
 
-    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+    MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
 
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index c6dca58..c060146 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -18,7 +18,7 @@
  * This optimization recognizes the common diamond selection pattern and
  * replaces it with an instance of the HSelect instruction.
  *
- * Recognized pattern:
+ * Recognized patterns:
  *
  *          If [ Condition ]
  *            /          \
@@ -26,14 +26,30 @@
  *            \          /
  *     Phi [FalseValue, TrueValue]
  *
+ * and
+ *
+ *             If [ Condition ]
+ *               /          \
+ *     false branch        true branch
+ *     return FalseValue   return TrueValue
+ *
  * The pattern will be simplified if `true_branch` and `false_branch` each
  * contain at most one instruction without any side effects.
  *
- * Blocks are merged into one and Select replaces the If and the Phi:
+ * Blocks are merged into one and Select replaces the If and the Phi.
+ *
+ * For the first pattern it simplifies to:
+ *
  *              true branch
  *              false branch
  *              Select [FalseValue, TrueValue, Condition]
  *
+ * For the second pattern it simplifies to:
+ *
+ *              true branch
+ *              false branch
+ *              return Select [FalseValue, TrueValue, Condition]
+ *
  * Note: In order to recognize no side-effect blocks, this optimization must be
  * run after the instruction simplifier has removed redundant suspend checks.
  */
@@ -42,19 +58,22 @@
 #define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
 
 #include "optimization.h"
+#include "reference_type_propagation.h"
 
 namespace art {
 
 class HSelectGenerator : public HOptimization {
  public:
-  HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kSelectGeneratorPassName, stats) {}
+  HSelectGenerator(HGraph* graph,
+                   VariableSizedHandleScope* handles,
+                   OptimizingCompilerStats* stats);
 
   void Run() OVERRIDE;
 
   static constexpr const char* kSelectGeneratorPassName = "select_generator";
 
  private:
+  VariableSizedHandleScope* handle_scope_;
   DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
 };
 
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2cbabcf..18099d8 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -935,11 +935,11 @@
 }
 
 void MipsAssembler::Beqz(Register rt, uint16_t imm16) {
-  Beq(ZERO, rt, imm16);
+  Beq(rt, ZERO, imm16);
 }
 
 void MipsAssembler::Bnez(Register rt, uint16_t imm16) {
-  Bne(ZERO, rt, imm16);
+  Bne(rt, ZERO, imm16);
 }
 
 void MipsAssembler::Bltz(Register rt, uint16_t imm16) {
@@ -3118,7 +3118,7 @@
 }
 
 void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) {
-  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+  OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_);
   if (is_r6) {
     // R6
     switch (initial_type) {
@@ -3131,23 +3131,31 @@
         type_ = kR6Literal;
         break;
       case kCall:
-        InitShortOrLong(offset_size, kR6Call, kR6LongCall);
+        InitShortOrLong(offset_size_needed, kR6Call, kR6LongCall);
         break;
       case kCondBranch:
         switch (condition_) {
           case kUncond:
-            InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+            InitShortOrLong(offset_size_needed, kR6UncondBranch, kR6LongUncondBranch);
             break;
           case kCondEQZ:
           case kCondNEZ:
             // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
-            type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+            type_ = (offset_size_needed <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
             break;
           default:
-            InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+            InitShortOrLong(offset_size_needed, kR6CondBranch, kR6LongCondBranch);
             break;
         }
         break;
+      case kBareCall:
+        type_ = kR6BareCall;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
+      case kBareCondBranch:
+        type_ = (condition_ == kUncond) ? kR6BareUncondBranch : kR6BareCondBranch;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
       default:
         LOG(FATAL) << "Unexpected branch type " << initial_type;
         UNREACHABLE();
@@ -3164,18 +3172,26 @@
         type_ = kLiteral;
         break;
       case kCall:
-        InitShortOrLong(offset_size, kCall, kLongCall);
+        InitShortOrLong(offset_size_needed, kCall, kLongCall);
         break;
       case kCondBranch:
         switch (condition_) {
           case kUncond:
-            InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+            InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch);
             break;
           default:
-            InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+            InitShortOrLong(offset_size_needed, kCondBranch, kLongCondBranch);
             break;
         }
         break;
+      case kBareCall:
+        type_ = kBareCall;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
+      case kBareCondBranch:
+        type_ = (condition_ == kUncond) ? kBareUncondBranch : kBareCondBranch;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
       default:
         LOG(FATAL) << "Unexpected branch type " << initial_type;
         UNREACHABLE();
@@ -3210,7 +3226,11 @@
   }
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call)
+MipsAssembler::Branch::Branch(bool is_r6,
+                              uint32_t location,
+                              uint32_t target,
+                              bool is_call,
+                              bool is_bare)
     : old_location_(location),
       location_(location),
       target_(target),
@@ -3218,7 +3238,9 @@
       rhs_reg_(0),
       condition_(kUncond),
       delayed_instruction_(kUnfilledDelaySlot) {
-  InitializeType((is_call ? kCall : kCondBranch), is_r6);
+  InitializeType(
+      (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)),
+      is_r6);
 }
 
 MipsAssembler::Branch::Branch(bool is_r6,
@@ -3226,7 +3248,8 @@
                               uint32_t target,
                               MipsAssembler::BranchCondition condition,
                               Register lhs_reg,
-                              Register rhs_reg)
+                              Register rhs_reg,
+                              bool is_bare)
     : old_location_(location),
       location_(location),
       target_(target),
@@ -3276,7 +3299,7 @@
     // Branch condition is always true, make the branch unconditional.
     condition_ = kUncond;
   }
-  InitializeType(kCondBranch, is_r6);
+  InitializeType((is_bare ? kBareCondBranch : kCondBranch), is_r6);
 }
 
 MipsAssembler::Branch::Branch(bool is_r6,
@@ -3419,20 +3442,44 @@
   return GetOldLocation() + GetOldSize();
 }
 
+bool MipsAssembler::Branch::IsBare() const {
+  switch (type_) {
+    // R2 short branches (can't be promoted to long), delay slots filled manually.
+    case kBareUncondBranch:
+    case kBareCondBranch:
+    case kBareCall:
+    // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+    case kR6BareUncondBranch:
+    case kR6BareCondBranch:
+    case kR6BareCall:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool MipsAssembler::Branch::IsLong() const {
   switch (type_) {
-    // R2 short branches.
+    // R2 short branches (can be promoted to long).
     case kUncondBranch:
     case kCondBranch:
     case kCall:
+    // R2 short branches (can't be promoted to long), delay slots filled manually.
+    case kBareUncondBranch:
+    case kBareCondBranch:
+    case kBareCall:
     // R2 near label.
     case kLabel:
     // R2 near literal.
     case kLiteral:
-    // R6 short branches.
+    // R6 short branches (can be promoted to long).
     case kR6UncondBranch:
     case kR6CondBranch:
     case kR6Call:
+    // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+    case kR6BareUncondBranch:
+    case kR6BareCondBranch:
+    case kR6BareCall:
     // R6 near label.
     case kR6Label:
     // R6 near literal.
@@ -3464,8 +3511,9 @@
 }
 
 MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSize() const {
+  bool r6_cond_branch = (type_ == kR6CondBranch || type_ == kR6BareCondBranch);
   OffsetBits offset_size =
-      (type_ == kR6CondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+      (r6_cond_branch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
           ? kOffset23
           : branch_info_[type_].offset_size;
   return offset_size;
@@ -3511,8 +3559,9 @@
 }
 
 void MipsAssembler::Branch::PromoteToLong() {
+  CHECK(!IsBare());  // Bare branches do not promote.
   switch (type_) {
-    // R2 short branches.
+    // R2 short branches (can be promoted to long).
     case kUncondBranch:
       type_ = kLongUncondBranch;
       break;
@@ -3530,7 +3579,7 @@
     case kLiteral:
       type_ = kFarLiteral;
       break;
-    // R6 short branches.
+    // R6 short branches (can be promoted to long).
     case kR6UncondBranch:
       type_ = kR6LongUncondBranch;
       break;
@@ -3585,7 +3634,7 @@
   }
   // The following logic is for debugging/testing purposes.
   // Promote some short branches to long when it's not really required.
-  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max() && !IsBare())) {
     int64_t distance = static_cast<int64_t>(target_) - location;
     distance = (distance >= 0) ? distance : -distance;
     if (distance >= max_short_distance) {
@@ -3851,6 +3900,10 @@
 }
 
 void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) {
+  if (branch.IsBare()) {
+    // Delay slots are filled manually in bare branches.
+    return;
+  }
   if (branch.CanHaveDelayedInstruction(delay_slot_)) {
     // The last instruction cannot be used in a different delay slot,
     // do not commit the label before it (if any).
@@ -3870,27 +3923,32 @@
   }
 }
 
-void MipsAssembler::Buncond(MipsLabel* label) {
+void MipsAssembler::Buncond(MipsLabel* label, bool is_r6, bool is_bare) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
+  branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ false, is_bare);
   MoveInstructionToDelaySlot(branches_.back());
   FinalizeLabeledBranch(label);
 }
 
-void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs) {
+void MipsAssembler::Bcond(MipsLabel* label,
+                          bool is_r6,
+                          bool is_bare,
+                          BranchCondition condition,
+                          Register lhs,
+                          Register rhs) {
   // If lhs = rhs, this can be a NOP.
   if (Branch::IsNop(condition, lhs, rhs)) {
     return;
   }
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs);
+  branches_.emplace_back(is_r6, buffer_.Size(), target, condition, lhs, rhs, is_bare);
   MoveInstructionToDelaySlot(branches_.back());
   FinalizeLabeledBranch(label);
 }
 
-void MipsAssembler::Call(MipsLabel* label) {
+void MipsAssembler::Call(MipsLabel* label, bool is_r6, bool is_bare) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
+  branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ true, is_bare);
   MoveInstructionToDelaySlot(branches_.back());
   FinalizeLabeledBranch(label);
 }
@@ -4038,10 +4096,14 @@
 
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
 const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = {
-  // R2 short branches.
+  // R2 short branches (can be promoted to long).
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kUncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCall
+  // R2 short branches (can't be promoted to long), delay slots filled manually.
+  {  1, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kBareUncondBranch
+  {  1, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kBareCondBranch
+  {  1, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kBareCall
   // R2 near label.
   {  1, 0, 0, MipsAssembler::Branch::kOffset16, 0 },  // kLabel
   // R2 near literal.
@@ -4054,11 +4116,16 @@
   {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kFarLabel
   // R2 far literal.
   {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kFarLiteral
-  // R6 short branches.
+  // R6 short branches (can be promoted to long).
   {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6UncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6CondBranch
                                                       // Exception: kOffset23 for beqzc/bnezc.
   {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6Call
+  // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6BareUncondBranch
+  {  1, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6BareCondBranch
+                                                      // Exception: kOffset23 for beqzc/bnezc.
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6BareCall
   // R6 near label.
   {  1, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Label
   // R6 near literal.
@@ -4124,6 +4191,21 @@
       Bal(offset);
       Emit(delayed_instruction);
       break;
+    case Branch::kBareUncondBranch:
+      DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      B(offset);
+      break;
+    case Branch::kBareCondBranch:
+      DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondR2(condition, lhs, rhs, offset);
+      break;
+    case Branch::kBareCall:
+      DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bal(offset);
+      break;
 
     // R2 near label.
     case Branch::kLabel:
@@ -4249,6 +4331,21 @@
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Balc(offset);
       break;
+    case Branch::kR6BareUncondBranch:
+      DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kR6BareCondBranch:
+      DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondR6(condition, lhs, rhs, offset);
+      break;
+    case Branch::kR6BareCall:
+      DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Balc(offset);
+      break;
 
     // R6 near label.
     case Branch::kR6Label:
@@ -4311,44 +4408,44 @@
   CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
 }
 
-void MipsAssembler::B(MipsLabel* label) {
-  Buncond(label);
+void MipsAssembler::B(MipsLabel* label, bool is_bare) {
+  Buncond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare);
 }
 
-void MipsAssembler::Bal(MipsLabel* label) {
-  Call(label);
+void MipsAssembler::Bal(MipsLabel* label, bool is_bare) {
+  Call(label, /* is_r6 */ (IsR6() && !is_bare), is_bare);
 }
 
-void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
-  Bcond(label, kCondEQ, rs, rt);
+void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQ, rs, rt);
 }
 
-void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label) {
-  Bcond(label, kCondNE, rs, rt);
+void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNE, rs, rt);
 }
 
-void MipsAssembler::Beqz(Register rt, MipsLabel* label) {
-  Bcond(label, kCondEQZ, rt);
+void MipsAssembler::Beqz(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQZ, rt);
 }
 
-void MipsAssembler::Bnez(Register rt, MipsLabel* label) {
-  Bcond(label, kCondNEZ, rt);
+void MipsAssembler::Bnez(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNEZ, rt);
 }
 
-void MipsAssembler::Bltz(Register rt, MipsLabel* label) {
-  Bcond(label, kCondLTZ, rt);
+void MipsAssembler::Bltz(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLTZ, rt);
 }
 
-void MipsAssembler::Bgez(Register rt, MipsLabel* label) {
-  Bcond(label, kCondGEZ, rt);
+void MipsAssembler::Bgez(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGEZ, rt);
 }
 
-void MipsAssembler::Blez(Register rt, MipsLabel* label) {
-  Bcond(label, kCondLEZ, rt);
+void MipsAssembler::Blez(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLEZ, rt);
 }
 
-void MipsAssembler::Bgtz(Register rt, MipsLabel* label) {
-  Bcond(label, kCondGTZ, rt);
+void MipsAssembler::Bgtz(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGTZ, rt);
 }
 
 bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const {
@@ -4399,74 +4496,130 @@
   }
 }
 
-void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) {
-  if (IsR6()) {
-    Bcond(label, kCondLT, rs, rt);
+void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  if (IsR6() && !is_bare) {
+    Bcond(label, IsR6(), is_bare, kCondLT, rs, rt);
   } else if (!Branch::IsNop(kCondLT, rs, rt)) {
     // Synthesize the instruction (not available on R2).
     GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt);
-    Bnez(AT, label);
+    Bnez(AT, label, is_bare);
   }
 }
 
-void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) {
-  if (IsR6()) {
-    Bcond(label, kCondGE, rs, rt);
+void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  if (IsR6() && !is_bare) {
+    Bcond(label, IsR6(), is_bare, kCondGE, rs, rt);
   } else if (Branch::IsUncond(kCondGE, rs, rt)) {
-    B(label);
+    B(label, is_bare);
   } else {
     // Synthesize the instruction (not available on R2).
     GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt);
-    Beqz(AT, label);
+    Beqz(AT, label, is_bare);
   }
 }
 
-void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) {
-  if (IsR6()) {
-    Bcond(label, kCondLTU, rs, rt);
+void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  if (IsR6() && !is_bare) {
+    Bcond(label, IsR6(), is_bare, kCondLTU, rs, rt);
   } else if (!Branch::IsNop(kCondLTU, rs, rt)) {
     // Synthesize the instruction (not available on R2).
     GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt);
-    Bnez(AT, label);
+    Bnez(AT, label, is_bare);
   }
 }
 
-void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) {
-  if (IsR6()) {
-    Bcond(label, kCondGEU, rs, rt);
+void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  if (IsR6() && !is_bare) {
+    Bcond(label, IsR6(), is_bare, kCondGEU, rs, rt);
   } else if (Branch::IsUncond(kCondGEU, rs, rt)) {
-    B(label);
+    B(label, is_bare);
   } else {
     // Synthesize the instruction (not available on R2).
     GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt);
-    Beqz(AT, label);
+    Beqz(AT, label, is_bare);
   }
 }
 
-void MipsAssembler::Bc1f(MipsLabel* label) {
-  Bc1f(0, label);
+void MipsAssembler::Bc1f(MipsLabel* label, bool is_bare) {
+  Bc1f(0, label, is_bare);
 }
 
-void MipsAssembler::Bc1f(int cc, MipsLabel* label) {
+void MipsAssembler::Bc1f(int cc, MipsLabel* label, bool is_bare) {
   CHECK(IsUint<3>(cc)) << cc;
-  Bcond(label, kCondF, static_cast<Register>(cc), ZERO);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondF, static_cast<Register>(cc), ZERO);
 }
 
-void MipsAssembler::Bc1t(MipsLabel* label) {
-  Bc1t(0, label);
+void MipsAssembler::Bc1t(MipsLabel* label, bool is_bare) {
+  Bc1t(0, label, is_bare);
 }
 
-void MipsAssembler::Bc1t(int cc, MipsLabel* label) {
+void MipsAssembler::Bc1t(int cc, MipsLabel* label, bool is_bare) {
   CHECK(IsUint<3>(cc)) << cc;
-  Bcond(label, kCondT, static_cast<Register>(cc), ZERO);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondT, static_cast<Register>(cc), ZERO);
 }
 
-void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) {
-  Bcond(label, kCondF, static_cast<Register>(ft), ZERO);
+void MipsAssembler::Bc(MipsLabel* label, bool is_bare) {
+  Buncond(label, /* is_r6 */ true, is_bare);
 }
 
-void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) {
-  Bcond(label, kCondT, static_cast<Register>(ft), ZERO);
+void MipsAssembler::Balc(MipsLabel* label, bool is_bare) {
+  Call(label, /* is_r6 */ true, is_bare);
+}
+
+void MipsAssembler::Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt);
+}
+
+void MipsAssembler::Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt);
+}
+
+void MipsAssembler::Beqzc(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rt);
+}
+
+void MipsAssembler::Bnezc(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rt);
+}
+
+void MipsAssembler::Bltzc(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt);
+}
+
+void MipsAssembler::Bgezc(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt);
+}
+
+void MipsAssembler::Blezc(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt);
+}
+
+void MipsAssembler::Bgtzc(Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt);
+}
+
+void MipsAssembler::Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt);
+}
+
+void MipsAssembler::Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt);
+}
+
+void MipsAssembler::Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt);
+}
+
+void MipsAssembler::Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt);
+}
+
+void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<Register>(ft), ZERO);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<Register>(ft), ZERO);
 }
 
 void MipsAssembler::AdjustBaseAndOffset(Register& base,
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index a7ff931..7f9d576 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -636,29 +636,69 @@
   void LoadSConst32(FRegister r, int32_t value, Register temp);
   void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT);
 
-  // These will generate R2 branches or R6 branches as appropriate and take care of
-  // the delay/forbidden slots.
   void Bind(MipsLabel* label);
-  void B(MipsLabel* label);
-  void Bal(MipsLabel* label);
-  void Beq(Register rs, Register rt, MipsLabel* label);
-  void Bne(Register rs, Register rt, MipsLabel* label);
-  void Beqz(Register rt, MipsLabel* label);
-  void Bnez(Register rt, MipsLabel* label);
-  void Bltz(Register rt, MipsLabel* label);
-  void Bgez(Register rt, MipsLabel* label);
-  void Blez(Register rt, MipsLabel* label);
-  void Bgtz(Register rt, MipsLabel* label);
-  void Blt(Register rs, Register rt, MipsLabel* label);
-  void Bge(Register rs, Register rt, MipsLabel* label);
-  void Bltu(Register rs, Register rt, MipsLabel* label);
-  void Bgeu(Register rs, Register rt, MipsLabel* label);
-  void Bc1f(MipsLabel* label);  // R2
-  void Bc1f(int cc, MipsLabel* label);  // R2
-  void Bc1t(MipsLabel* label);  // R2
-  void Bc1t(int cc, MipsLabel* label);  // R2
-  void Bc1eqz(FRegister ft, MipsLabel* label);  // R6
-  void Bc1nez(FRegister ft, MipsLabel* label);  // R6
+  // When `is_bare` is false, the branches will promote to long (if the range
+  // of the individual branch instruction is insufficient) and the delay/
+  // forbidden slots will be taken care of.
+  // Use `is_bare = false` when the branch target may be out of reach of the
+  // individual branch instruction. IOW, this is for general purpose use.
+  //
+  // When `is_bare` is true, just the branch instructions will be generated
+  // leaving delay/forbidden slot filling up to the caller and the branches
+  // won't promote to long if the range is insufficient (you'll get a
+  // compilation error when the range is exceeded).
+  // Use `is_bare = true` when the branch target is known to be within reach
+  // of the individual branch instruction. This is intended for small local
+  // optimizations around delay/forbidden slots.
+  // Also prefer using `is_bare = true` if the code near the branch is to be
+  // patched or analyzed at run time (e.g. introspection) to
+  // - show the intent and
+  // - fail during compilation rather than during patching/execution if the
+  //   bare branch range is insufficent but the code size and layout are
+  //   expected to remain unchanged
+  //
+  // R2 branches with delay slots that are also available on R6.
+  // On R6 when `is_bare` is false these convert to equivalent R6 compact
+  // branches (to reduce code size). On R2 or when `is_bare` is true they
+  // remain R2 branches with delay slots.
+  void B(MipsLabel* label, bool is_bare = false);
+  void Bal(MipsLabel* label, bool is_bare = false);
+  void Beq(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+  void Bne(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+  void Beqz(Register rt, MipsLabel* label, bool is_bare = false);
+  void Bnez(Register rt, MipsLabel* label, bool is_bare = false);
+  void Bltz(Register rt, MipsLabel* label, bool is_bare = false);
+  void Bgez(Register rt, MipsLabel* label, bool is_bare = false);
+  void Blez(Register rt, MipsLabel* label, bool is_bare = false);
+  void Bgtz(Register rt, MipsLabel* label, bool is_bare = false);
+  void Blt(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+  void Bge(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+  void Bltu(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+  void Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+  // R2-only branches with delay slots.
+  void Bc1f(MipsLabel* label, bool is_bare = false);  // R2
+  void Bc1f(int cc, MipsLabel* label, bool is_bare = false);  // R2
+  void Bc1t(MipsLabel* label, bool is_bare = false);  // R2
+  void Bc1t(int cc, MipsLabel* label, bool is_bare = false);  // R2
+  // R6-only compact branches without delay/forbidden slots.
+  void Bc(MipsLabel* label, bool is_bare = false);  // R6
+  void Balc(MipsLabel* label, bool is_bare = false);  // R6
+  // R6-only compact branches with forbidden slots.
+  void Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Beqzc(Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bnezc(Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bltzc(Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bgezc(Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Blezc(Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bgtzc(Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  void Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare = false);  // R6
+  // R6-only branches with delay slots.
+  void Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare = false);  // R6
+  void Bc1nez(FRegister ft, MipsLabel* label, bool is_bare = false);  // R6
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
   void AdjustBaseAndOffset(Register& base,
@@ -1268,10 +1308,14 @@
   class Branch {
    public:
     enum Type {
-      // R2 short branches.
+      // R2 short branches (can be promoted to long).
       kUncondBranch,
       kCondBranch,
       kCall,
+      // R2 short branches (can't be promoted to long), delay slots filled manually.
+      kBareUncondBranch,
+      kBareCondBranch,
+      kBareCall,
       // R2 near label.
       kLabel,
       // R2 near literal.
@@ -1284,10 +1328,14 @@
       kFarLabel,
       // R2 far literal.
       kFarLiteral,
-      // R6 short branches.
+      // R6 short branches (can be promoted to long).
       kR6UncondBranch,
       kR6CondBranch,
       kR6Call,
+      // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+      kR6BareUncondBranch,
+      kR6BareCondBranch,
+      kR6BareCall,
       // R6 near label.
       kR6Label,
       // R6 near literal.
@@ -1337,7 +1385,7 @@
       // instructions) from the instruction containing the offset.
       uint32_t pc_org;
       // How large (in bits) a PC-relative offset can be for a given type of branch (kR6CondBranch
-      // is an exception: use kOffset23 for beqzc/bnezc).
+      // and kR6BareCondBranch are an exception: use kOffset23 for beqzc/bnezc).
       OffsetBits offset_size;
       // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
       // count.
@@ -1346,14 +1394,15 @@
     static const BranchInfo branch_info_[/* Type */];
 
     // Unconditional branch or call.
-    Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call);
+    Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call, bool is_bare);
     // Conditional branch.
     Branch(bool is_r6,
            uint32_t location,
            uint32_t target,
            BranchCondition condition,
            Register lhs_reg,
-           Register rhs_reg);
+           Register rhs_reg,
+           bool is_bare);
     // Label address (in literal area) or literal.
     Branch(bool is_r6,
            uint32_t location,
@@ -1385,6 +1434,7 @@
     uint32_t GetOldSize() const;
     uint32_t GetEndLocation() const;
     uint32_t GetOldEndLocation() const;
+    bool IsBare() const;
     bool IsLong() const;
     bool IsResolved() const;
 
@@ -1513,9 +1563,14 @@
                       VectorRegister wd,
                       int minor_opcode);
 
-  void Buncond(MipsLabel* label);
-  void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
-  void Call(MipsLabel* label);
+  void Buncond(MipsLabel* label, bool is_r6, bool is_bare);
+  void Bcond(MipsLabel* label,
+             bool is_r6,
+             bool is_bare,
+             BranchCondition condition,
+             Register lhs,
+             Register rhs = ZERO);
+  void Call(MipsLabel* label, bool is_r6, bool is_bare);
   void FinalizeLabeledBranch(MipsLabel* label);
 
   // Various helpers for branch delay slot management.
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index b72a14e..6e52b17 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -259,12 +259,52 @@
     return result;
   }
 
-  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
-                                                              mips::Register,
-                                                              mips::MipsLabel*),
-                               const std::string& instr_name) {
+  void BranchHelper(void (mips::MipsAssembler::*f)(mips::MipsLabel*,
+                                                   bool),
+                    const std::string& instr_name,
+                    bool has_slot,
+                    bool is_bare = false) {
+    __ SetReorder(false);
+    mips::MipsLabel label1, label2;
+    (Base::GetAssembler()->*f)(&label1, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label1);
+    (Base::GetAssembler()->*f)(&label2, is_bare);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label2);
+    (Base::GetAssembler()->*f)(&label1, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " 1f\n" +
+        ((is_bare || !has_slot) ? "" : "nop\n") +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        instr_name + " 2f\n" +
+        ((is_bare || !has_slot) ? "" : "nop\n") +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "2:\n" +
+        instr_name + " 1b\n" +
+        ((is_bare || !has_slot) ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                             mips::MipsLabel*,
+                                                             bool),
+                              const std::string& instr_name,
+                              bool is_bare = false) {
+    __ SetReorder(false);
     mips::MipsLabel label;
-    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    (Base::GetAssembler()->*f)(mips::A0, &label, is_bare);
     constexpr size_t kAdduCount1 = 63;
     for (size_t i = 0; i != kAdduCount1; ++i) {
       __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
@@ -274,17 +314,86 @@
     for (size_t i = 0; i != kAdduCount2; ++i) {
       __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
     }
-    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+    (Base::GetAssembler()->*f)(mips::A1, &label, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
 
     std::string expected =
         ".set noreorder\n" +
-        instr_name + " $a0, $a1, 1f\n"
-        "nop\n" +
+        instr_name + " $a0, 1f\n" +
+        (is_bare ? "" : "nop\n") +
         RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
         "1:\n" +
         RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        instr_name + " $a2, $a3, 1b\n"
-        "nop\n";
+        instr_name + " $a1, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                              mips::Register,
+                                                              mips::MipsLabel*,
+                                                              bool),
+                               const std::string& instr_name,
+                               bool is_bare = false) {
+    __ SetReorder(false);
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n" +
+        (is_bare ? "" : "nop\n") +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchFpuCondHelper(void (mips::MipsAssembler::*f)(mips::FRegister,
+                                                          mips::MipsLabel*,
+                                                          bool),
+                           const std::string& instr_name,
+                           bool is_bare = false) {
+    __ SetReorder(false);
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::F0, &label, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::F30, &label, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $f0, 1f\n" +
+        (is_bare ? "" : "nop\n") +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $f30, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
     DriverStr(expected, instr_name);
   }
 
@@ -947,78 +1056,386 @@
   DriverStr(expected, "StoreQToOffset");
 }
 
-TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) {
-  mips::MipsLabel label;
-  __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
-  constexpr size_t kAdduCount = 0x3FFDE;
-  for (size_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-
-  std::string expected =
-      "lapc $v0, 1f\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "1:\n";
-  DriverStr(expected, "LoadFarthestNearLabelAddress");
-}
-
-TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) {
-  mips::MipsLabel label;
-  __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
-  constexpr size_t kAdduCount = 0x3FFDF;
-  for (size_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(2f - 1b)\n"
-      "addiu $v0, $at, %lo(2f - 1b)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n";
-  DriverStr(expected, "LoadNearestFarLabelAddress");
-}
-
-TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
-  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ LoadLiteral(mips::V0, mips::ZERO, literal);
-  constexpr size_t kAdduCount = 0x3FFDE;
-  for (size_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-
-  std::string expected =
-      "lwpc $v0, 1f\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadFarthestNearLiteral");
-}
-
-TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
-  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ LoadLiteral(mips::V0, mips::ZERO, literal);
-  constexpr size_t kAdduCount = 0x3FFDF;
-  for (size_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(2f - 1b)\n"
-      "lw $v0, %lo(2f - 1b)($at)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadNearestFarLiteral");
-}
-
 //////////////
 // BRANCHES //
 //////////////
 
+TEST_F(AssemblerMIPS32r6Test, Bc) {
+  BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Balc) {
+  BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beqc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bnec) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beqzc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bnezc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltzc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgezc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Blezc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgtzc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgec) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltuc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgeuc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bc1eqz) {
+  BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bc1nez) {
+  BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez");
+}
+
+TEST_F(AssemblerMIPS32r6Test, B) {
+  BranchHelper(&mips::MipsAssembler::B, "Bc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bal) {
+  BranchHelper(&mips::MipsAssembler::Bal, "Balc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beq) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beqc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bne) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bnec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beqz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bnez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Blez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgtz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Blt) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Bltc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bge) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bgec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltu) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgeu) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBc) {
+  BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBalc) {
+  BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeqc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBnec) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeqzc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBnezc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltzc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgezc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBlezc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgtzc) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgec) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltuc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgeuc) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBc1eqz) {
+  BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBc1nez) {
+  BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareB) {
+  BranchHelper(&mips::MipsAssembler::B, "B", /* has_slot */ true, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBal) {
+  BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* has_slot */ true, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeq) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBne) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeqz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBnez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBlez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgtz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBlt) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBge) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltu) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgeu) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBeqc) {
+  mips::MipsLabel label;
+  __ Beqc(mips::A0, mips::A1, &label);
+  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Beqc(mips::A2, mips::A3, &label);
+
+  uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kAdduCount2 + 1);  // 1: account for bnec.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "bnec $a0, $a1, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "2:\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "bnec $a2, $a3, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBeqzc) {
+  constexpr uint32_t kNopCount1 = (1u << 20) + 1;
+  constexpr uint32_t kNopCount2 = (1u << 20) + 1;
+  constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+  ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+  __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
+  mips::MipsLabel label;
+  __ Beqzc(mips::A0, &label);
+  for (uint32_t i = 0; i != kNopCount1; ++i) {
+    __ Nop();
+  }
+  __ Bind(&label);
+  for (uint32_t i = 0; i != kNopCount2; ++i) {
+    __ Nop();
+  }
+  __ Beqzc(mips::A2, &label);
+
+  uint32_t offset_forward = 2 + kNopCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kNopCount2 + 1);  // 1: account for bnezc.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+  // instead of generating them ourselves in the source code. This saves test time.
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "bnezc $a0, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
+      "2:\n" <<
+      ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
+      "bnezc $a2, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBc) {
+  constexpr uint32_t kNopCount1 = (1u << 25) + 1;
+  constexpr uint32_t kNopCount2 = (1u << 25) + 1;
+  constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+  ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+  __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
+  mips::MipsLabel label1, label2;
+  __ Bc(&label1);
+  for (uint32_t i = 0; i != kNopCount1; ++i) {
+    __ Nop();
+  }
+  __ Bind(&label1);
+  __ Bc(&label2);
+  for (uint32_t i = 0; i != kNopCount2; ++i) {
+    __ Nop();
+  }
+  __ Bind(&label2);
+  __ Bc(&label1);
+
+  uint32_t offset_forward1 = 2 + kNopCount1;  // 2: account for auipc and jic.
+  offset_forward1 <<= 2;
+  offset_forward1 += (offset_forward1 & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_forward2 = 2 + kNopCount2;  // 2: account for auipc and jic.
+  offset_forward2 <<= 2;
+  offset_forward2 += (offset_forward2 & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(2 + kNopCount2);  // 2: account for auipc and jic.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+  // instead of generating them ourselves in the source code. This saves a few minutes
+  // of test time.
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+      ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
+      "1:\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+      ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
+      "2:\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBc");
+}
+
 TEST_F(AssemblerMIPS32r6Test, ImpossibleReordering) {
   mips::MipsLabel label;
   __ SetReorder(true);
@@ -1154,43 +1571,80 @@
       "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
       "3:\n";
   std::string expected = oss.str();
-  DriverStr(expected, "LongBeqc");
+  DriverStr(expected, "LongBranchReorder");
 }
 
-// TODO: MipsAssembler::Bc
-//       MipsAssembler::Jic
-//       MipsAssembler::Jialc
-//       MipsAssembler::Bltc
-//       MipsAssembler::Bltzc
-//       MipsAssembler::Bgtzc
-//       MipsAssembler::Bgec
-//       MipsAssembler::Bgezc
-//       MipsAssembler::Blezc
-//       MipsAssembler::Bltuc
-//       MipsAssembler::Bgeuc
-//       MipsAssembler::Beqc
-//       MipsAssembler::Bnec
-//       MipsAssembler::Beqzc
-//       MipsAssembler::Bnezc
-//       MipsAssembler::Bc1eqz
-//       MipsAssembler::Bc1nez
-//       MipsAssembler::Buncond
-//       MipsAssembler::Bcond
-//       MipsAssembler::Call
+///////////////////////
+// Loading Constants //
+///////////////////////
 
-// TODO:  AssemblerMIPS32r6Test.B
-//        AssemblerMIPS32r6Test.Beq
-//        AssemblerMIPS32r6Test.Bne
-//        AssemblerMIPS32r6Test.Beqz
-//        AssemblerMIPS32r6Test.Bnez
-//        AssemblerMIPS32r6Test.Bltz
-//        AssemblerMIPS32r6Test.Bgez
-//        AssemblerMIPS32r6Test.Blez
-//        AssemblerMIPS32r6Test.Bgtz
-//        AssemblerMIPS32r6Test.Blt
-//        AssemblerMIPS32r6Test.Bge
-//        AssemblerMIPS32r6Test.Bltu
-//        AssemblerMIPS32r6Test.Bgeu
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) {
+  mips::MipsLabel label;
+  __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+  constexpr size_t kAdduCount = 0x3FFDE;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      "lapc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n";
+  DriverStr(expected, "LoadFarthestNearLabelAddress");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) {
+  mips::MipsLabel label;
+  __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+  constexpr size_t kAdduCount = 0x3FFDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "addiu $v0, $at, %lo(2f - 1b)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n";
+  DriverStr(expected, "LoadNearestFarLabelAddress");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDE;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "lwpc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
 
 // MSA instructions.
 
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 0917530..d9bf0b8 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -186,11 +186,51 @@
     return result;
   }
 
+  void BranchHelper(void (mips::MipsAssembler::*f)(mips::MipsLabel*,
+                                                   bool),
+                    const std::string& instr_name,
+                    bool is_bare = false) {
+    __ SetReorder(false);
+    mips::MipsLabel label1, label2;
+    (Base::GetAssembler()->*f)(&label1, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label1);
+    (Base::GetAssembler()->*f)(&label2, is_bare);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label2);
+    (Base::GetAssembler()->*f)(&label1, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " 1f\n" +
+        (is_bare ? "" : "nop\n") +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        instr_name + " 2f\n" +
+        (is_bare ? "" : "nop\n") +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "2:\n" +
+        instr_name + " 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
   void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
-                                                             mips::MipsLabel*),
-                              const std::string& instr_name) {
+                                                             mips::MipsLabel*,
+                                                             bool),
+                              const std::string& instr_name,
+                              bool is_bare = false) {
+    __ SetReorder(false);
     mips::MipsLabel label;
-    (Base::GetAssembler()->*f)(mips::A0, &label);
+    (Base::GetAssembler()->*f)(mips::A0, &label, is_bare);
     constexpr size_t kAdduCount1 = 63;
     for (size_t i = 0; i != kAdduCount1; ++i) {
       __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
@@ -200,26 +240,31 @@
     for (size_t i = 0; i != kAdduCount2; ++i) {
       __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
     }
-    (Base::GetAssembler()->*f)(mips::A1, &label);
+    (Base::GetAssembler()->*f)(mips::A1, &label, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
 
     std::string expected =
         ".set noreorder\n" +
-        instr_name + " $a0, 1f\n"
-        "nop\n" +
+        instr_name + " $a0, 1f\n" +
+        (is_bare ? "" : "nop\n") +
         RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
         "1:\n" +
         RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        instr_name + " $a1, 1b\n"
-        "nop\n";
+        instr_name + " $a1, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
     DriverStr(expected, instr_name);
   }
 
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
-                                                              mips::MipsLabel*),
-                               const std::string& instr_name) {
+                                                              mips::MipsLabel*,
+                                                              bool),
+                               const std::string& instr_name,
+                               bool is_bare = false) {
+    __ SetReorder(false);
     mips::MipsLabel label;
-    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label, is_bare);
     constexpr size_t kAdduCount1 = 63;
     for (size_t i = 0; i != kAdduCount1; ++i) {
       __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
@@ -229,17 +274,52 @@
     for (size_t i = 0; i != kAdduCount2; ++i) {
       __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
     }
-    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
 
     std::string expected =
         ".set noreorder\n" +
-        instr_name + " $a0, $a1, 1f\n"
-        "nop\n" +
+        instr_name + " $a0, $a1, 1f\n" +
+        (is_bare ? "" : "nop\n") +
         RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
         "1:\n" +
         RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        instr_name + " $a2, $a3, 1b\n"
-        "nop\n";
+        instr_name + " $a2, $a3, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchFpuCondCodeHelper(void (mips::MipsAssembler::*f)(int,
+                                                              mips::MipsLabel*,
+                                                              bool),
+                               const std::string& instr_name,
+                               bool is_bare = false) {
+    __ SetReorder(false);
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(0, &label, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(7, &label, is_bare);
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $fcc0, 1f\n" +
+        (is_bare ? "" : "nop\n") +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $fcc7, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
     DriverStr(expected, instr_name);
   }
 
@@ -2072,410 +2152,136 @@
   DriverStr(expected, "StoreConstToOffset");
 }
 
-TEST_F(AssemblerMIPSTest, B) {
-  mips::MipsLabel label1, label2;
-  __ B(&label1);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label1);
-  __ B(&label2);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label2);
-  __ B(&label1);
+//////////////
+// BRANCHES //
+//////////////
 
-  std::string expected =
-      ".set noreorder\n"
-      "b 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      "b 2f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      "b 1b\n"
-      "nop\n";
-  DriverStr(expected, "B");
+TEST_F(AssemblerMIPSTest, B) {
+  BranchHelper(&mips::MipsAssembler::B, "B");
+}
+
+TEST_F(AssemblerMIPSTest, Bal) {
+  BranchHelper(&mips::MipsAssembler::Bal, "Bal");
 }
 
 TEST_F(AssemblerMIPSTest, Beq) {
-  __ SetReorder(false);
   BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
 }
 
 TEST_F(AssemblerMIPSTest, Bne) {
-  __ SetReorder(false);
   BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
 }
 
 TEST_F(AssemblerMIPSTest, Beqz) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Beqz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Beqz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "beq $zero, $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "beq $zero, $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Beqz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz");
 }
 
 TEST_F(AssemblerMIPSTest, Bnez) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Bnez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bnez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bne $zero, $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bne $zero, $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bnez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez");
 }
 
 TEST_F(AssemblerMIPSTest, Bltz) {
-  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
 }
 
 TEST_F(AssemblerMIPSTest, Bgez) {
-  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
 }
 
 TEST_F(AssemblerMIPSTest, Blez) {
-  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
 }
 
 TEST_F(AssemblerMIPSTest, Bgtz) {
-  __ SetReorder(false);
   BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
 }
 
 TEST_F(AssemblerMIPSTest, Blt) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Blt(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Blt(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "slt $at, $a0, $a1\n"
-      "bne $zero, $at, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "slt $at, $a2, $a3\n"
-      "bne $zero, $at, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Blt");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt");
 }
 
 TEST_F(AssemblerMIPSTest, Bge) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Bge(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bge(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "slt $at, $a0, $a1\n"
-      "beq $zero, $at, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "slt $at, $a2, $a3\n"
-      "beq $zero, $at, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bge");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge");
 }
 
 TEST_F(AssemblerMIPSTest, Bltu) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Bltu(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bltu(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "sltu $at, $a0, $a1\n"
-      "bne $zero, $at, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "sltu $at, $a2, $a3\n"
-      "bne $zero, $at, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bltu");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu");
 }
 
 TEST_F(AssemblerMIPSTest, Bgeu) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Bgeu(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgeu(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "sltu $at, $a0, $a1\n"
-      "beq $zero, $at, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "sltu $at, $a2, $a3\n"
-      "beq $zero, $at, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgeu");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu");
 }
 
 TEST_F(AssemblerMIPSTest, Bc1f) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Bc1f(0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bc1f(7, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bc1f $fcc0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bc1f $fcc7, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bc1f");
+  BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f");
 }
 
 TEST_F(AssemblerMIPSTest, Bc1t) {
-  __ SetReorder(false);
-  mips::MipsLabel label;
-  __ Bc1t(0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bc1t(7, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bc1t $fcc0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bc1t $fcc7, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bc1t");
+  BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t");
 }
 
-///////////////////////
-// Loading Constants //
-///////////////////////
-
-TEST_F(AssemblerMIPSTest, LoadConst32) {
-  // IsUint<16>(value)
-  __ LoadConst32(mips::V0, 0);
-  __ LoadConst32(mips::V0, 65535);
-  // IsInt<16>(value)
-  __ LoadConst32(mips::V0, -1);
-  __ LoadConst32(mips::V0, -32768);
-  // Everything else
-  __ LoadConst32(mips::V0, 65536);
-  __ LoadConst32(mips::V0, 65537);
-  __ LoadConst32(mips::V0, 2147483647);
-  __ LoadConst32(mips::V0, -32769);
-  __ LoadConst32(mips::V0, -65536);
-  __ LoadConst32(mips::V0, -65537);
-  __ LoadConst32(mips::V0, -2147483647);
-  __ LoadConst32(mips::V0, -2147483648);
-
-  const char* expected =
-      // IsUint<16>(value)
-      "ori $v0, $zero, 0\n"         // __ LoadConst32(mips::V0, 0);
-      "ori $v0, $zero, 65535\n"     // __ LoadConst32(mips::V0, 65535);
-      // IsInt<16>(value)
-      "addiu $v0, $zero, -1\n"      // __ LoadConst32(mips::V0, -1);
-      "addiu $v0, $zero, -32768\n"  // __ LoadConst32(mips::V0, -32768);
-      // Everything else
-      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65536);
-      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65537);
-      "ori $v0, 1\n"                //                 "
-      "lui $v0, 32767\n"            // __ LoadConst32(mips::V0, 2147483647);
-      "ori $v0, 65535\n"            //                 "
-      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -32769);
-      "ori $v0, 32767\n"            //                 "
-      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -65536);
-      "lui $v0, 65534\n"            // __ LoadConst32(mips::V0, -65537);
-      "ori $v0, 65535\n"            //                 "
-      "lui $v0, 32768\n"            // __ LoadConst32(mips::V0, -2147483647);
-      "ori $v0, 1\n"                //                 "
-      "lui $v0, 32768\n";           // __ LoadConst32(mips::V0, -2147483648);
-  DriverStr(expected, "LoadConst32");
+TEST_F(AssemblerMIPSTest, BareB) {
+  BranchHelper(&mips::MipsAssembler::B, "B", /* is_bare */ true);
 }
 
-TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) {
-  mips::MipsLabel label;
-  __ BindPcRelBaseLabel();
-  __ LoadLabelAddress(mips::V0, mips::V1, &label);
-  constexpr size_t kAddiuCount = 0x1FDE;
-  for (size_t i = 0; i != kAddiuCount; ++i) {
-    __ Addiu(mips::A0, mips::A1, 0);
-  }
-  __ Bind(&label);
-
-  std::string expected =
-      "1:\n"
-      "addiu $v0, $v1, %lo(2f - 1b)\n" +
-      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
-      "2:\n";
-  DriverStr(expected, "LoadFarthestNearLabelAddress");
+TEST_F(AssemblerMIPSTest, BareBal) {
+  BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* is_bare */ true);
 }
 
-TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) {
-  mips::MipsLabel label;
-  __ BindPcRelBaseLabel();
-  __ LoadLabelAddress(mips::V0, mips::V1, &label);
-  constexpr size_t kAdduCount = 0x1FDF;
-  for (size_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-
-  std::string expected =
-      "1:\n"
-      "lui $at, %hi(2f - 1b)\n"
-      "ori $at, $at, %lo(2f - 1b)\n"
-      "addu $v0, $at, $v1\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n";
-  DriverStr(expected, "LoadNearestFarLabelAddress");
+TEST_F(AssemblerMIPSTest, BareBeq) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true);
 }
 
-TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
-  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ BindPcRelBaseLabel();
-  __ LoadLiteral(mips::V0, mips::V1, literal);
-  constexpr size_t kAddiuCount = 0x1FDE;
-  for (size_t i = 0; i != kAddiuCount; ++i) {
-    __ Addiu(mips::A0, mips::A1, 0);
-  }
-
-  std::string expected =
-      "1:\n"
-      "lw $v0, %lo(2f - 1b)($v1)\n" +
-      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadFarthestNearLiteral");
+TEST_F(AssemblerMIPSTest, BareBne) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true);
 }
 
-TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
-  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ BindPcRelBaseLabel();
-  __ LoadLiteral(mips::V0, mips::V1, literal);
-  constexpr size_t kAdduCount = 0x1FDF;
-  for (size_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
+TEST_F(AssemblerMIPSTest, BareBeqz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true);
+}
 
-  std::string expected =
-      "1:\n"
-      "lui $at, %hi(2f - 1b)\n"
-      "addu $at, $at, $v1\n"
-      "lw $v0, %lo(2f - 1b)($at)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadNearestFarLiteral");
+TEST_F(AssemblerMIPSTest, BareBnez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBltz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBgez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBlez) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBgtz) {
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBlt) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBge) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBltu) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBgeu) {
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBc1f) {
+  BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBc1t) {
+  BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t", /* is_bare */ true);
 }
 
 TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
@@ -2554,7 +2360,7 @@
       "nop\n"
 
       "addu $t0, $t1, $t2\n"
-      "beq $zero, $t0, 1b\n"
+      "beqz $t0, 1b\n"
       "nop\n"
 
       "or $t1, $t2, $t3\n"
@@ -2563,17 +2369,17 @@
 
       "and $t0, $t1, $t2\n"
       "slt $at, $t1, $t0\n"
-      "bne $zero, $at, 1b\n"
+      "bnez $at, 1b\n"
       "nop\n"
 
       "xor $at, $t0, $t1\n"
       "slt $at, $t1, $t0\n"
-      "beq $zero, $at, 1b\n"
+      "beqz $at, 1b\n"
       "nop\n"
 
       "subu $t0, $t1, $at\n"
       "sltu $at, $t1, $t0\n"
-      "bne $zero, $at, 1b\n"
+      "bnez $at, 1b\n"
       "nop\n"
 
       "c.olt.s $fcc1, $f2, $f4\n"
@@ -2606,11 +2412,11 @@
 
       "2:\n"
 
-      "bne $zero, $t0, 2b\n"
+      "bnez $t0, 2b\n"
       "nop\n"
 
       "sltu $at, $t1, $t0\n"
-      "beq $zero, $at, 2b\n"
+      "beqz $at, 2b\n"
       "nop\n"
 
       "bc1f $fcc2, 2b\n"
@@ -2666,22 +2472,22 @@
       ".set noreorder\n"
       "1:\n"
 
-      "beq $zero, $t1, 1b\n"
+      "beqz $t1, 1b\n"
       "addu $t0, $t1, $t2\n"
 
       "bne $t2, $t3, 1b\n"
       "or $t1, $t2, $t3\n"
 
       "slt $at, $t1, $t2\n"
-      "bne $zero, $at, 1b\n"
+      "bnez $at, 1b\n"
       "and $t0, $t1, $t2\n"
 
       "slt $at, $t1, $t0\n"
-      "beq $zero, $at, 1b\n"
+      "beqz $at, 1b\n"
       "xor $t2, $t0, $t1\n"
 
       "sltu $at, $t1, $t0\n"
-      "bne $zero, $at, 1b\n"
+      "bnez $at, 1b\n"
       "subu $t2, $t1, $t0\n"
 
       "bc1t $fcc1, 1b\n"
@@ -2882,6 +2688,127 @@
   DriverStr(expected, "LongBranchReorder");
 }
 
+///////////////////////
+// Loading Constants //
+///////////////////////
+
+TEST_F(AssemblerMIPSTest, LoadConst32) {
+  // IsUint<16>(value)
+  __ LoadConst32(mips::V0, 0);
+  __ LoadConst32(mips::V0, 65535);
+  // IsInt<16>(value)
+  __ LoadConst32(mips::V0, -1);
+  __ LoadConst32(mips::V0, -32768);
+  // Everything else
+  __ LoadConst32(mips::V0, 65536);
+  __ LoadConst32(mips::V0, 65537);
+  __ LoadConst32(mips::V0, 2147483647);
+  __ LoadConst32(mips::V0, -32769);
+  __ LoadConst32(mips::V0, -65536);
+  __ LoadConst32(mips::V0, -65537);
+  __ LoadConst32(mips::V0, -2147483647);
+  __ LoadConst32(mips::V0, -2147483648);
+
+  const char* expected =
+      // IsUint<16>(value)
+      "ori $v0, $zero, 0\n"         // __ LoadConst32(mips::V0, 0);
+      "ori $v0, $zero, 65535\n"     // __ LoadConst32(mips::V0, 65535);
+      // IsInt<16>(value)
+      "addiu $v0, $zero, -1\n"      // __ LoadConst32(mips::V0, -1);
+      "addiu $v0, $zero, -32768\n"  // __ LoadConst32(mips::V0, -32768);
+      // Everything else
+      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65536);
+      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65537);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32767\n"            // __ LoadConst32(mips::V0, 2147483647);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -32769);
+      "ori $v0, 32767\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -65536);
+      "lui $v0, 65534\n"            // __ LoadConst32(mips::V0, -65537);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 32768\n"            // __ LoadConst32(mips::V0, -2147483647);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32768\n";           // __ LoadConst32(mips::V0, -2147483648);
+  DriverStr(expected, "LoadConst32");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) {
+  mips::MipsLabel label;
+  __ BindPcRelBaseLabel();
+  __ LoadLabelAddress(mips::V0, mips::V1, &label);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      "1:\n"
+      "addiu $v0, $v1, %lo(2f - 1b)\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n";
+  DriverStr(expected, "LoadFarthestNearLabelAddress");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) {
+  mips::MipsLabel label;
+  __ BindPcRelBaseLabel();
+  __ LoadLabelAddress(mips::V0, mips::V1, &label);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      "1:\n"
+      "lui $at, %hi(2f - 1b)\n"
+      "ori $at, $at, %lo(2f - 1b)\n"
+      "addu $v0, $at, $v1\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n";
+  DriverStr(expected, "LoadNearestFarLabelAddress");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lw $v0, %lo(2f - 1b)($v1)\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lui $at, %hi(2f - 1b)\n"
+      "addu $at, $at, $v1\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 7a1beb6..3aa09fb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -795,14 +795,42 @@
   EmitFI(0x11, 0xD, ft, imm16);
 }
 
-void Mips64Assembler::Beqz(GpuRegister rt, uint16_t imm16) {
-  EmitI(0x4, ZERO, rt, imm16);
+void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  EmitI(0x4, rs, rt, imm16);
 }
 
-void Mips64Assembler::EmitBcondc(BranchCondition cond,
-                                 GpuRegister rs,
-                                 GpuRegister rt,
-                                 uint32_t imm16_21) {
+void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  EmitI(0x5, rs, rt, imm16);
+}
+
+void Mips64Assembler::Beqz(GpuRegister rt, uint16_t imm16) {
+  Beq(rt, ZERO, imm16);
+}
+
+void Mips64Assembler::Bnez(GpuRegister rt, uint16_t imm16) {
+  Bne(rt, ZERO, imm16);
+}
+
+void Mips64Assembler::Bltz(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x1, rt, static_cast<GpuRegister>(0), imm16);
+}
+
+void Mips64Assembler::Bgez(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x1, rt, static_cast<GpuRegister>(0x1), imm16);
+}
+
+void Mips64Assembler::Blez(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x6, rt, static_cast<GpuRegister>(0), imm16);
+}
+
+void Mips64Assembler::Bgtz(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x7, rt, static_cast<GpuRegister>(0), imm16);
+}
+
+void Mips64Assembler::EmitBcondR6(BranchCondition cond,
+                                  GpuRegister rs,
+                                  GpuRegister rt,
+                                  uint32_t imm16_21) {
   switch (cond) {
     case kCondLT:
       Bltc(rs, rt, imm16_21);
@@ -866,6 +894,55 @@
   }
 }
 
+void Mips64Assembler::EmitBcondR2(BranchCondition cond,
+                                  GpuRegister rs,
+                                  GpuRegister rt,
+                                  uint16_t imm16) {
+  switch (cond) {
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltz(rs, imm16);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgez(rs, imm16);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blez(rs, imm16);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtz(rs, imm16);
+      break;
+    case kCondEQ:
+      Beq(rs, rt, imm16);
+      break;
+    case kCondNE:
+      Bne(rs, rt, imm16);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqz(rs, imm16);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnez(rs, imm16);
+      break;
+    case kCondF:
+    case kCondT:
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
+}
+
 void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
   EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
 }
@@ -2013,37 +2090,67 @@
   type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
 }
 
-void Mips64Assembler::Branch::InitializeType(Type initial_type) {
-  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
-  switch (initial_type) {
-    case kLabel:
-    case kLiteral:
-    case kLiteralUnsigned:
-    case kLiteralLong:
-      CHECK(!IsResolved());
-      type_ = initial_type;
-      break;
-    case kCall:
-      InitShortOrLong(offset_size, kCall, kLongCall);
-      break;
-    case kCondBranch:
-      switch (condition_) {
-        case kUncond:
-          InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
-          break;
-        case kCondEQZ:
-        case kCondNEZ:
-          // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
-          type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
-          break;
-        default:
-          InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
-          break;
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected branch type " << initial_type;
-      UNREACHABLE();
+void Mips64Assembler::Branch::InitializeType(Type initial_type, bool is_r6) {
+  OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_);
+  if (is_r6) {
+    // R6
+    switch (initial_type) {
+      case kLabel:
+      case kLiteral:
+      case kLiteralUnsigned:
+      case kLiteralLong:
+        CHECK(!IsResolved());
+        type_ = initial_type;
+        break;
+      case kCall:
+        InitShortOrLong(offset_size_needed, kCall, kLongCall);
+        break;
+      case kCondBranch:
+        switch (condition_) {
+          case kUncond:
+            InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch);
+            break;
+          case kCondEQZ:
+          case kCondNEZ:
+            // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+            type_ = (offset_size_needed <= kOffset23) ? kCondBranch : kLongCondBranch;
+            break;
+          default:
+            InitShortOrLong(offset_size_needed, kCondBranch, kLongCondBranch);
+            break;
+        }
+        break;
+      case kBareCall:
+        type_ = kBareCall;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
+      case kBareCondBranch:
+        type_ = (condition_ == kUncond) ? kBareUncondBranch : kBareCondBranch;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
+      default:
+        LOG(FATAL) << "Unexpected branch type " << initial_type;
+        UNREACHABLE();
+    }
+  } else {
+    // R2
+    CHECK_EQ(initial_type, kBareCondBranch);
+    switch (condition_) {
+      case kCondLTZ:
+      case kCondGEZ:
+      case kCondLEZ:
+      case kCondGTZ:
+      case kCondEQ:
+      case kCondNE:
+      case kCondEQZ:
+      case kCondNEZ:
+        break;
+      default:
+        LOG(FATAL) << "Unexpected R2 branch condition " << condition_;
+        UNREACHABLE();
+    }
+    type_ = kR2BareCondBranch;
+    CHECK_LE(offset_size_needed, GetOffsetSize());
   }
   old_type_ = type_;
 }
@@ -2076,21 +2183,25 @@
   }
 }
 
-Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call)
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare)
     : old_location_(location),
       location_(location),
       target_(target),
       lhs_reg_(ZERO),
       rhs_reg_(ZERO),
       condition_(kUncond) {
-  InitializeType(is_call ? kCall : kCondBranch);
+  InitializeType(
+      (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)),
+      /* is_r6 */ true);
 }
 
-Mips64Assembler::Branch::Branch(uint32_t location,
+Mips64Assembler::Branch::Branch(bool is_r6,
+                                uint32_t location,
                                 uint32_t target,
                                 Mips64Assembler::BranchCondition condition,
                                 GpuRegister lhs_reg,
-                                GpuRegister rhs_reg)
+                                GpuRegister rhs_reg,
+                                bool is_bare)
     : old_location_(location),
       location_(location),
       target_(target),
@@ -2131,7 +2242,7 @@
     // Branch condition is always true, make the branch unconditional.
     condition_ = kUncond;
   }
-  InitializeType(kCondBranch);
+  InitializeType((is_bare ? kBareCondBranch : kCondBranch), is_r6);
 }
 
 Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type)
@@ -2142,7 +2253,7 @@
       rhs_reg_(ZERO),
       condition_(kUncond) {
   CHECK_NE(dest_reg, ZERO);
-  InitializeType(label_or_literal_type);
+  InitializeType(label_or_literal_type, /* is_r6 */ true);
 }
 
 Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
@@ -2238,12 +2349,32 @@
   return GetOldLocation() + GetOldSize();
 }
 
+bool Mips64Assembler::Branch::IsBare() const {
+  switch (type_) {
+    // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+    case kBareUncondBranch:
+    case kBareCondBranch:
+    case kBareCall:
+    // R2 short branches (can't be promoted to long), delay slots filled manually.
+    case kR2BareCondBranch:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool Mips64Assembler::Branch::IsLong() const {
   switch (type_) {
-    // Short branches.
+    // R6 short branches (can be promoted to long).
     case kUncondBranch:
     case kCondBranch:
     case kCall:
+    // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+    case kBareUncondBranch:
+    case kBareCondBranch:
+    case kBareCall:
+    // R2 short branches (can't be promoted to long), delay slots filled manually.
+    case kR2BareCondBranch:
     // Near label.
     case kLabel:
     // Near literals.
@@ -2271,8 +2402,9 @@
 }
 
 Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const {
+  bool r6_cond_branch = (type_ == kCondBranch || type_ == kBareCondBranch);
   OffsetBits offset_size =
-      (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+      (r6_cond_branch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
           ? kOffset23
           : branch_info_[type_].offset_size;
   return offset_size;
@@ -2318,8 +2450,9 @@
 }
 
 void Mips64Assembler::Branch::PromoteToLong() {
+  CHECK(!IsBare());  // Bare branches do not promote.
   switch (type_) {
-    // Short branches.
+    // R6 short branches (can be promoted to long).
     case kUncondBranch:
       type_ = kLongUncondBranch;
       break;
@@ -2366,7 +2499,7 @@
   }
   // The following logic is for debugging/testing purposes.
   // Promote some short branches to long when it's not really required.
-  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max() && !IsBare())) {
     int64_t distance = static_cast<int64_t>(target_) - location_;
     distance = (distance >= 0) ? distance : -distance;
     if (distance >= max_short_distance) {
@@ -2498,13 +2631,15 @@
   }
 }
 
-void Mips64Assembler::Buncond(Mips64Label* label) {
+void Mips64Assembler::Buncond(Mips64Label* label, bool is_bare) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(buffer_.Size(), target, /* is_call */ false);
+  branches_.emplace_back(buffer_.Size(), target, /* is_call */ false, is_bare);
   FinalizeLabeledBranch(label);
 }
 
 void Mips64Assembler::Bcond(Mips64Label* label,
+                            bool is_r6,
+                            bool is_bare,
                             BranchCondition condition,
                             GpuRegister lhs,
                             GpuRegister rhs) {
@@ -2513,13 +2648,13 @@
     return;
   }
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs);
+  branches_.emplace_back(is_r6, buffer_.Size(), target, condition, lhs, rhs, is_bare);
   FinalizeLabeledBranch(label);
 }
 
-void Mips64Assembler::Call(Mips64Label* label) {
+void Mips64Assembler::Call(Mips64Label* label, bool is_bare) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(buffer_.Size(), target, /* is_call */ true);
+  branches_.emplace_back(buffer_.Size(), target, /* is_call */ true, is_bare);
   FinalizeLabeledBranch(label);
 }
 
@@ -2730,11 +2865,18 @@
 
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
 const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = {
-  // Short branches.
+  // R6 short branches (can be promoted to long).
   {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kUncondBranch
   {  2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kCondBranch
                                                         // Exception: kOffset23 for beqzc/bnezc
   {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kCall
+  // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kBareUncondBranch
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kBareCondBranch
+                                                        // Exception: kOffset23 for beqzc/bnezc
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kBareCall
+  // R2 short branches (can't be promoted to long), delay slots filled manually.
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kR2BareCondBranch
   // Near label.
   {  1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kLabel
   // Near literals.
@@ -2769,13 +2911,29 @@
       break;
     case Branch::kCondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcondc(condition, lhs, rhs, offset);
+      EmitBcondR6(condition, lhs, rhs, offset);
       Nop();  // TODO: improve by filling the forbidden/delay slot.
       break;
     case Branch::kCall:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Balc(offset);
       break;
+    case Branch::kBareUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kBareCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondR6(condition, lhs, rhs, offset);
+      break;
+    case Branch::kBareCall:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Balc(offset);
+      break;
+    case Branch::kR2BareCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondR2(condition, lhs, rhs, offset);
+      break;
 
     // Near label.
     case Branch::kLabel:
@@ -2804,7 +2962,7 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kLongCondBranch:
-      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2);
       offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Auipc(AT, High16Bits(offset));
@@ -2848,68 +3006,108 @@
   CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
 }
 
-void Mips64Assembler::Bc(Mips64Label* label) {
-  Buncond(label);
+void Mips64Assembler::Bc(Mips64Label* label, bool is_bare) {
+  Buncond(label, is_bare);
 }
 
-void Mips64Assembler::Balc(Mips64Label* label) {
-  Call(label);
+void Mips64Assembler::Balc(Mips64Label* label, bool is_bare) {
+  Call(label, is_bare);
 }
 
-void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondLT, rs, rt);
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt);
 }
 
-void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondLTZ, rt);
+void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt);
 }
 
-void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondGTZ, rt);
+void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt);
 }
 
-void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondGE, rs, rt);
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt);
 }
 
-void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondGEZ, rt);
+void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt);
 }
 
-void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondLEZ, rt);
+void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt);
 }
 
-void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondLTU, rs, rt);
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt);
 }
 
-void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondGEU, rs, rt);
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt);
 }
 
-void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondEQ, rs, rt);
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt);
 }
 
-void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
-  Bcond(label, kCondNE, rs, rt);
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt);
 }
 
-void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) {
-  Bcond(label, kCondEQZ, rs);
+void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rs);
 }
 
-void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
-  Bcond(label, kCondNEZ, rs);
+void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rs);
 }
 
-void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
-  Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<GpuRegister>(ft), ZERO);
 }
 
-void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
-  Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare) {
+  Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<GpuRegister>(ft), ZERO);
+}
+
+void Mips64Assembler::Bltz(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondLTZ, rt);
+}
+
+void Mips64Assembler::Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondGTZ, rt);
+}
+
+void Mips64Assembler::Bgez(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondGEZ, rt);
+}
+
+void Mips64Assembler::Blez(GpuRegister rt, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondLEZ, rt);
+}
+
+void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondEQ, rs, rt);
+}
+
+void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondNE, rs, rt);
+}
+
+void Mips64Assembler::Beqz(GpuRegister rs, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondEQZ, rs);
+}
+
+void Mips64Assembler::Bnez(GpuRegister rs, Mips64Label* label, bool is_bare) {
+  CHECK(is_bare);
+  Bcond(label, /* is_r6 */ false, is_bare, kCondNEZ, rs);
 }
 
 void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base,
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index c39d120..023bcd6 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -563,7 +563,14 @@
   void Bnezc(GpuRegister rs, uint32_t imm21);
   void Bc1eqz(FpuRegister ft, uint16_t imm16);
   void Bc1nez(FpuRegister ft, uint16_t imm16);
-  void Beqz(GpuRegister rt, uint16_t imm16);
+  void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R2
+  void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R2
+  void Beqz(GpuRegister rt, uint16_t imm16);  // R2
+  void Bnez(GpuRegister rt, uint16_t imm16);  // R2
+  void Bltz(GpuRegister rt, uint16_t imm16);  // R2
+  void Bgez(GpuRegister rt, uint16_t imm16);  // R2
+  void Blez(GpuRegister rt, uint16_t imm16);  // R2
+  void Bgtz(GpuRegister rt, uint16_t imm16);  // R2
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -922,22 +929,57 @@
   // the table data) and should be loaded using LoadLabelAddress().
   JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels);
 
-  void Bc(Mips64Label* label);
-  void Balc(Mips64Label* label);
-  void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
-  void Bltzc(GpuRegister rt, Mips64Label* label);
-  void Bgtzc(GpuRegister rt, Mips64Label* label);
-  void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
-  void Bgezc(GpuRegister rt, Mips64Label* label);
-  void Blezc(GpuRegister rt, Mips64Label* label);
-  void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
-  void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
-  void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
-  void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
-  void Beqzc(GpuRegister rs, Mips64Label* label);
-  void Bnezc(GpuRegister rs, Mips64Label* label);
-  void Bc1eqz(FpuRegister ft, Mips64Label* label);
-  void Bc1nez(FpuRegister ft, Mips64Label* label);
+  // When `is_bare` is false, the branches will promote to long (if the range
+  // of the individual branch instruction is insufficient) and the delay/
+  // forbidden slots will be taken care of.
+  // Use `is_bare = false` when the branch target may be out of reach of the
+  // individual branch instruction. IOW, this is for general purpose use.
+  //
+  // When `is_bare` is true, just the branch instructions will be generated
+  // leaving delay/forbidden slot filling up to the caller and the branches
+  // won't promote to long if the range is insufficient (you'll get a
+  // compilation error when the range is exceeded).
+  // Use `is_bare = true` when the branch target is known to be within reach
+  // of the individual branch instruction. This is intended for small local
+  // optimizations around delay/forbidden slots.
+  // Also prefer using `is_bare = true` if the code near the branch is to be
+  // patched or analyzed at run time (e.g. introspection) to
+  // - show the intent and
+  // - fail during compilation rather than during patching/execution if the
+  //   bare branch range is insufficent but the code size and layout are
+  //   expected to remain unchanged
+  //
+  // R6 compact branches without delay/forbidden slots.
+  void Bc(Mips64Label* label, bool is_bare = false);
+  void Balc(Mips64Label* label, bool is_bare = false);
+  // R6 compact branches with forbidden slots.
+  void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Blezc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+  void Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare = false);
+  void Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare = false);
+  // R6 branches with delay slots.
+  void Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare = false);
+  void Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare = false);
+  // R2 branches with delay slots that are also available on R6.
+  // The `is_bare` parameter exists and is checked in these branches only to
+  // prevent programming mistakes. These branches never promote to long, not
+  // even if `is_bare` is false.
+  void Bltz(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
+  void Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
+  void Bgez(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
+  void Blez(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
+  void Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
+  void Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
+  void Beqz(GpuRegister rs, Mips64Label* label, bool is_bare = false);  // R2
+  void Bnez(GpuRegister rs, Mips64Label* label, bool is_bare = false);  // R2
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
@@ -1379,10 +1421,16 @@
   class Branch {
    public:
     enum Type {
-      // Short branches.
+      // R6 short branches (can be promoted to long).
       kUncondBranch,
       kCondBranch,
       kCall,
+      // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+      kBareUncondBranch,
+      kBareCondBranch,
+      kBareCall,
+      // R2 short branches (can't be promoted to long), delay slots filled manually.
+      kR2BareCondBranch,
       // Near label.
       kLabel,
       // Near literals.
@@ -1425,8 +1473,8 @@
       // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
       // instructions) from the instruction containing the offset.
       uint32_t pc_org;
-      // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
-      // an exception: use kOffset23 for beqzc/bnezc).
+      // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch
+      // and kBareCondBranch are an exception: use kOffset23 for beqzc/bnezc).
       OffsetBits offset_size;
       // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
       // count.
@@ -1435,13 +1483,15 @@
     static const BranchInfo branch_info_[/* Type */];
 
     // Unconditional branch or call.
-    Branch(uint32_t location, uint32_t target, bool is_call);
+    Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare);
     // Conditional branch.
-    Branch(uint32_t location,
+    Branch(bool is_r6,
+           uint32_t location,
            uint32_t target,
            BranchCondition condition,
            GpuRegister lhs_reg,
-           GpuRegister rhs_reg);
+           GpuRegister rhs_reg,
+           bool is_bare);
     // Label address (in literal area) or literal.
     Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type);
 
@@ -1467,6 +1517,7 @@
     uint32_t GetOldSize() const;
     uint32_t GetEndLocation() const;
     uint32_t GetOldEndLocation() const;
+    bool IsBare() const;
     bool IsLong() const;
     bool IsResolved() const;
 
@@ -1527,7 +1578,7 @@
 
    private:
     // Completes branch construction by determining and recording its type.
-    void InitializeType(Type initial_type);
+    void InitializeType(Type initial_type, bool is_r6);
     // Helper for the above.
     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
 
@@ -1554,7 +1605,8 @@
   void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
-  void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+  void EmitBcondR6(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+  void EmitBcondR2(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint16_t imm16);
   void EmitMsa3R(int operation,
                  int df,
                  VectorRegister wt,
@@ -1568,12 +1620,14 @@
   void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
   void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
 
-  void Buncond(Mips64Label* label);
+  void Buncond(Mips64Label* label, bool is_bare);
   void Bcond(Mips64Label* label,
+             bool is_r6,
+             bool is_bare,
              BranchCondition condition,
              GpuRegister lhs,
              GpuRegister rhs = ZERO);
-  void Call(Mips64Label* label);
+  void Call(Mips64Label* label, bool is_bare);
   void FinalizeLabeledBranch(Mips64Label* label);
 
   Branch* GetBranch(uint32_t branch_id);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 021e335..1541780 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -257,11 +257,46 @@
     return result;
   }
 
+  void BranchHelper(void (mips64::Mips64Assembler::*f)(mips64::Mips64Label*,
+                                                       bool),
+                    const std::string& instr_name,
+                    bool is_bare = false) {
+    mips64::Mips64Label label1, label2;
+    (Base::GetAssembler()->*f)(&label1, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label1);
+    (Base::GetAssembler()->*f)(&label2, is_bare);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label2);
+    (Base::GetAssembler()->*f)(&label1, is_bare);
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " 1f\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        instr_name + " 2f\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "2:\n" +
+        instr_name + " 1b\n" +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
   void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
-                                                                 mips64::Mips64Label*),
-                              const std::string& instr_name) {
+                                                                 mips64::Mips64Label*,
+                                                                 bool),
+                              const std::string& instr_name,
+                              bool is_bare = false) {
     mips64::Mips64Label label;
-    (Base::GetAssembler()->*f)(mips64::A0, &label);
+    (Base::GetAssembler()->*f)(mips64::A0, &label, is_bare);
     constexpr size_t kAdduCount1 = 63;
     for (size_t i = 0; i != kAdduCount1; ++i) {
       __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
@@ -271,26 +306,30 @@
     for (size_t i = 0; i != kAdduCount2; ++i) {
       __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
     }
-    (Base::GetAssembler()->*f)(mips64::A1, &label);
+    (Base::GetAssembler()->*f)(mips64::A1, &label, is_bare);
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
 
     std::string expected =
         ".set noreorder\n" +
-        instr_name + " $a0, 1f\n"
-        "nop\n" +
+        instr_name + " $a0, 1f\n" +
+        (is_bare ? "" : "nop\n") +
         RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
         "1:\n" +
         RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        instr_name + " $a1, 1b\n"
-        "nop\n";
+        instr_name + " $a1, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
     DriverStr(expected, instr_name);
   }
 
   void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                   mips64::GpuRegister,
-                                                                  mips64::Mips64Label*),
-                               const std::string& instr_name) {
+                                                                  mips64::Mips64Label*,
+                                                                  bool),
+                               const std::string& instr_name,
+                               bool is_bare = false) {
     mips64::Mips64Label label;
-    (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
+    (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label, is_bare);
     constexpr size_t kAdduCount1 = 63;
     for (size_t i = 0; i != kAdduCount1; ++i) {
       __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
@@ -300,17 +339,51 @@
     for (size_t i = 0; i != kAdduCount2; ++i) {
       __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
     }
-    (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label);
+    (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label, is_bare);
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
 
     std::string expected =
         ".set noreorder\n" +
-        instr_name + " $a0, $a1, 1f\n"
-        "nop\n" +
+        instr_name + " $a0, $a1, 1f\n" +
+        (is_bare ? "" : "nop\n") +
         RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
         "1:\n" +
         RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        instr_name + " $a2, $a3, 1b\n"
-        "nop\n";
+        instr_name + " $a2, $a3, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchFpuCondHelper(void (mips64::Mips64Assembler::*f)(mips64::FpuRegister,
+                                                              mips64::Mips64Label*,
+                                                              bool),
+                           const std::string& instr_name,
+                           bool is_bare = false) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::F0, &label, is_bare);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::F31, &label, is_bare);
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $f0, 1f\n" +
+        (is_bare ? "" : "nop\n") +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $f31, 1b\n" +
+        (is_bare ? "" : "nop\n") +
+        "addu $zero, $zero, $zero\n";
     DriverStr(expected, instr_name);
   }
 
@@ -668,41 +741,258 @@
             "sdc1");
 }
 
-////////////////
-// CALL / JMP //
-////////////////
+//////////////
+// BRANCHES //
+//////////////
 
 TEST_F(AssemblerMIPS64Test, Jalr) {
   DriverStr(".set noreorder\n" +
             RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
 }
 
-TEST_F(AssemblerMIPS64Test, Balc) {
-  mips64::Mips64Label label1, label2;
-  __ Balc(&label1);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label1);
-  __ Balc(&label2);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label2);
-  __ Balc(&label1);
+TEST_F(AssemblerMIPS64Test, Bc) {
+  BranchHelper(&mips64::Mips64Assembler::Bc, "Bc");
+}
 
-  std::string expected =
+TEST_F(AssemblerMIPS64Test, Balc) {
+  BranchHelper(&mips64::Mips64Assembler::Balc, "Balc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Blezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgtzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgeuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1eqz) {
+  BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1nez) {
+  BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez");
+}
+
+TEST_F(AssemblerMIPS64Test, BareBc) {
+  BranchHelper(&mips64::Mips64Assembler::Bc, "Bc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBalc) {
+  BranchHelper(&mips64::Mips64Assembler::Balc, "Balc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeqzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBnezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBlezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgtzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeqc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBnec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgeuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBc1eqz) {
+  BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBc1nez) {
+  BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeqz) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqz, "Beqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBnez) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnez, "Bnez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltz) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltz, "Bltz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgez) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgez, "Bgez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBlez) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blez, "Blez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgtz) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtz, "Bgtz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeq) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beq, "Beq", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBne) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bne, "Bne", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, LongBeqc) {
+  mips64::Mips64Label label;
+  __ Beqc(mips64::A0, mips64::A1, &label);
+  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label);
+  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Beqc(mips64::A2, mips64::A3, &label);
+
+  uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kAdduCount2 + 1);  // 1: account for bnec.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  std::ostringstream oss;
+  oss <<
       ".set noreorder\n"
-      "balc 1f\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      "balc 2f\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      "balc 1b\n";
-  DriverStr(expected, "Balc");
+      "bnec $a0, $a1, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "2:\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "bnec $a2, $a3, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongBeqzc) {
+  constexpr uint32_t kNopCount1 = (1u << 20) + 1;
+  constexpr uint32_t kNopCount2 = (1u << 20) + 1;
+  constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+  ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+  __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
+  mips64::Mips64Label label;
+  __ Beqzc(mips64::A0, &label);
+  for (uint32_t i = 0; i != kNopCount1; ++i) {
+    __ Nop();
+  }
+  __ Bind(&label);
+  for (uint32_t i = 0; i != kNopCount2; ++i) {
+    __ Nop();
+  }
+  __ Beqzc(mips64::A2, &label);
+
+  uint32_t offset_forward = 2 + kNopCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kNopCount2 + 1);  // 1: account for bnezc.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+  // instead of generating them ourselves in the source code. This saves test time.
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "bnezc $a0, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
+      "2:\n" <<
+      ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
+      "bnezc $a2, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqzc");
 }
 
 TEST_F(AssemblerMIPS64Test, LongBalc) {
@@ -756,174 +1046,6 @@
   DriverStr(expected, "LongBalc");
 }
 
-TEST_F(AssemblerMIPS64Test, Bc) {
-  mips64::Mips64Label label1, label2;
-  __ Bc(&label1);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label1);
-  __ Bc(&label2);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label2);
-  __ Bc(&label1);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bc 1f\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      "bc 2f\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      "bc 1b\n";
-  DriverStr(expected, "Bc");
-}
-
-TEST_F(AssemblerMIPS64Test, Beqzc) {
-  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bnezc) {
-  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bltzc) {
-  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bgezc) {
-  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc");
-}
-
-TEST_F(AssemblerMIPS64Test, Blezc) {
-  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bgtzc) {
-  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc");
-}
-
-TEST_F(AssemblerMIPS64Test, Beqc) {
-  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bnec) {
-  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec");
-}
-
-TEST_F(AssemblerMIPS64Test, Bltc) {
-  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bgec) {
-  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec");
-}
-
-TEST_F(AssemblerMIPS64Test, Bltuc) {
-  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bgeuc) {
-  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
-}
-
-TEST_F(AssemblerMIPS64Test, Bc1eqz) {
-    mips64::Mips64Label label;
-    __ Bc1eqz(mips64::F0, &label);
-    constexpr size_t kAdduCount1 = 63;
-    for (size_t i = 0; i != kAdduCount1; ++i) {
-      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-    }
-    __ Bind(&label);
-    constexpr size_t kAdduCount2 = 64;
-    for (size_t i = 0; i != kAdduCount2; ++i) {
-      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-    }
-    __ Bc1eqz(mips64::F31, &label);
-
-    std::string expected =
-        ".set noreorder\n"
-        "bc1eqz $f0, 1f\n"
-        "nop\n" +
-        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-        "1:\n" +
-        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        "bc1eqz $f31, 1b\n"
-        "nop\n";
-    DriverStr(expected, "Bc1eqz");
-}
-
-TEST_F(AssemblerMIPS64Test, Bc1nez) {
-    mips64::Mips64Label label;
-    __ Bc1nez(mips64::F0, &label);
-    constexpr size_t kAdduCount1 = 63;
-    for (size_t i = 0; i != kAdduCount1; ++i) {
-      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-    }
-    __ Bind(&label);
-    constexpr size_t kAdduCount2 = 64;
-    for (size_t i = 0; i != kAdduCount2; ++i) {
-      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-    }
-    __ Bc1nez(mips64::F31, &label);
-
-    std::string expected =
-        ".set noreorder\n"
-        "bc1nez $f0, 1f\n"
-        "nop\n" +
-        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-        "1:\n" +
-        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-        "bc1nez $f31, 1b\n"
-        "nop\n";
-    DriverStr(expected, "Bc1nez");
-}
-
-TEST_F(AssemblerMIPS64Test, LongBeqc) {
-  mips64::Mips64Label label;
-  __ Beqc(mips64::A0, mips64::A1, &label);
-  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
-  for (uint32_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label);
-  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
-  for (uint32_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Beqc(mips64::A2, mips64::A3, &label);
-
-  uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
-  offset_forward <<= 2;
-  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
-
-  uint32_t offset_back = -(kAdduCount2 + 1);  // 1: account for bnec.
-  offset_back <<= 2;
-  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
-
-  std::ostringstream oss;
-  oss <<
-      ".set noreorder\n"
-      "bnec $a0, $a1, 1f\n"
-      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
-      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
-      "1:\n" <<
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
-      "2:\n" <<
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
-      "bnec $a2, $a3, 3f\n"
-      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
-      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
-      "3:\n";
-  std::string expected = oss.str();
-  DriverStr(expected, "LongBeqc");
-}
-
 //////////
 // MISC //
 //////////
@@ -961,235 +1083,6 @@
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Addiupc, 19, code), "Addiupc");
 }
 
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) {
-  mips64::Mips64Label label;
-  __ LoadLabelAddress(mips64::V0, &label);
-  constexpr uint32_t kAdduCount = 0x3FFDE;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label);
-
-  std::string expected =
-      "lapc $v0, 1f\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "1:\n";
-  DriverStr(expected, "LoadFarthestNearLabelAddress");
-  EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) {
-  mips64::Mips64Label label;
-  __ LoadLabelAddress(mips64::V0, &label);
-  constexpr uint32_t kAdduCount = 0x3FFDF;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  __ Bind(&label);
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(2f - 1b)\n"
-      "daddiu $v0, $at, %lo(2f - 1b)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n";
-  DriverStr(expected, "LoadNearestFarLabelAddress");
-  EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) {
-  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
-  constexpr uint32_t kAdduCount = 0x3FFDE;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-
-  std::string expected =
-      "lwpc $v0, 1f\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadFarthestNearLiteral");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) {
-  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
-  constexpr uint32_t kAdduCount = 0x3FFDF;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(2f - 1b)\n"
-      "lw $v0, %lo(2f - 1b)($at)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadNearestFarLiteral");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) {
-  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
-  constexpr uint32_t kAdduCount = 0x3FFDE;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-
-  std::string expected =
-      "lwupc $v0, 1f\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadFarthestNearLiteralUnsigned");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) {
-  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
-  __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
-  constexpr uint32_t kAdduCount = 0x3FFDF;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(2f - 1b)\n"
-      "lwu $v0, %lo(2f - 1b)($at)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadNearestFarLiteralUnsigned");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) {
-  mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
-  __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
-  constexpr uint32_t kAdduCount = 0x3FFDD;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-
-  std::string expected =
-      "ldpc $v0, 1f\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "1:\n"
-      ".dword 0x0123456789ABCDEF\n";
-  DriverStr(expected, "LoadFarthestNearLiteralLong");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) {
-  mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
-  __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
-  constexpr uint32_t kAdduCount = 0x3FFDE;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(2f - 1b)\n"
-      "ld $v0, %lo(2f - 1b)($at)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "2:\n"
-      ".dword 0x0123456789ABCDEF\n";
-  DriverStr(expected, "LoadNearestFarLiteralLong");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) {
-  mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
-  mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
-  mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA));
-  __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
-  __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
-  __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3);
-  __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
-  __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
-  // A nop will be inserted here before the 64-bit literals.
-
-  std::string expected =
-      "ldpc $a1, 1f\n"
-      // The GNU assembler incorrectly requires the ldpc instruction to be located
-      // at an address that's a multiple of 8. TODO: Remove this workaround if/when
-      // the assembler is fixed.
-      // "ldpc $a2, 2f\n"
-      ".word 0xECD80004\n"
-      "ldpc $a3, 3f\n"
-      "lapc $v0, 1f\n"
-      "lapc $v1, 2f\n"
-      "nop\n"
-      "1:\n"
-      ".dword 0x0123456789ABCDEF\n"
-      "2:\n"
-      ".dword 0x5555555555555555\n"
-      "3:\n"
-      ".dword 0xAAAAAAAAAAAAAAAA\n";
-  DriverStr(expected, "LongLiteralAlignmentNop");
-  EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u);
-  EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u);
-  EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u);
-}
-
-TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) {
-  mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
-  mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
-  __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
-  __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
-  __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
-  __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
-
-  std::string expected =
-      "ldpc $a1, 1f\n"
-      // The GNU assembler incorrectly requires the ldpc instruction to be located
-      // at an address that's a multiple of 8. TODO: Remove this workaround if/when
-      // the assembler is fixed.
-      // "ldpc $a2, 2f\n"
-      ".word 0xECD80003\n"
-      "lapc $v0, 1f\n"
-      "lapc $v1, 2f\n"
-      "1:\n"
-      ".dword 0x0123456789ABCDEF\n"
-      "2:\n"
-      ".dword 0x5555555555555555\n";
-  DriverStr(expected, "LongLiteralAlignmentNoNop");
-  EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u);
-  EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u);
-}
-
-TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) {
-  mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
-  __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
-  __ LoadLabelAddress(mips64::V1, literal->GetLabel());
-  constexpr uint32_t kAdduCount = 0x3FFDF;
-  for (uint32_t i = 0; i != kAdduCount; ++i) {
-    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
-  }
-  // A nop will be inserted here before the 64-bit literal.
-
-  std::string expected =
-      "1:\n"
-      "auipc $at, %hi(3f - 1b)\n"
-      "ld $v0, %lo(3f - 1b)($at)\n"
-      "2:\n"
-      "auipc $at, %hi(3f - 2b)\n"
-      "daddiu $v1, $at, %lo(3f - 2b)\n" +
-      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
-      "nop\n"
-      "3:\n"
-      ".dword 0x0123456789ABCDEF\n";
-  DriverStr(expected, "FarLongLiteralAlignmentNop");
-  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4);
-}
-
 TEST_F(AssemblerMIPS64Test, Addu) {
   DriverStr(RepeatRRR(&mips64::Mips64Assembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "addu");
 }
@@ -2740,6 +2633,235 @@
   EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths);
 }
 
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) {
+  mips64::Mips64Label label;
+  __ LoadLabelAddress(mips64::V0, &label);
+  constexpr uint32_t kAdduCount = 0x3FFDE;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      "lapc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n";
+  DriverStr(expected, "LoadFarthestNearLabelAddress");
+  EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) {
+  mips64::Mips64Label label;
+  __ LoadLabelAddress(mips64::V0, &label);
+  constexpr uint32_t kAdduCount = 0x3FFDF;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label);
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "daddiu $v0, $at, %lo(2f - 1b)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n";
+  DriverStr(expected, "LoadNearestFarLabelAddress");
+  EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) {
+  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
+  constexpr uint32_t kAdduCount = 0x3FFDE;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+
+  std::string expected =
+      "lwpc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) {
+  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
+  constexpr uint32_t kAdduCount = 0x3FFDF;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) {
+  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
+  constexpr uint32_t kAdduCount = 0x3FFDE;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+
+  std::string expected =
+      "lwupc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteralUnsigned");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) {
+  mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
+  constexpr uint32_t kAdduCount = 0x3FFDF;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "lwu $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteralUnsigned");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) {
+  mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+  __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
+  constexpr uint32_t kAdduCount = 0x3FFDD;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+
+  std::string expected =
+      "ldpc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".dword 0x0123456789ABCDEF\n";
+  DriverStr(expected, "LoadFarthestNearLiteralLong");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) {
+  mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+  __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
+  constexpr uint32_t kAdduCount = 0x3FFDE;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "ld $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".dword 0x0123456789ABCDEF\n";
+  DriverStr(expected, "LoadNearestFarLiteralLong");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) {
+  mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+  mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
+  mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA));
+  __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
+  __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
+  __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3);
+  __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
+  __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
+  // A nop will be inserted here before the 64-bit literals.
+
+  std::string expected =
+      "ldpc $a1, 1f\n"
+      // The GNU assembler incorrectly requires the ldpc instruction to be located
+      // at an address that's a multiple of 8. TODO: Remove this workaround if/when
+      // the assembler is fixed.
+      // "ldpc $a2, 2f\n"
+      ".word 0xECD80004\n"
+      "ldpc $a3, 3f\n"
+      "lapc $v0, 1f\n"
+      "lapc $v1, 2f\n"
+      "nop\n"
+      "1:\n"
+      ".dword 0x0123456789ABCDEF\n"
+      "2:\n"
+      ".dword 0x5555555555555555\n"
+      "3:\n"
+      ".dword 0xAAAAAAAAAAAAAAAA\n";
+  DriverStr(expected, "LongLiteralAlignmentNop");
+  EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u);
+}
+
+TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) {
+  mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+  mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
+  __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
+  __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
+  __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
+  __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
+
+  std::string expected =
+      "ldpc $a1, 1f\n"
+      // The GNU assembler incorrectly requires the ldpc instruction to be located
+      // at an address that's a multiple of 8. TODO: Remove this workaround if/when
+      // the assembler is fixed.
+      // "ldpc $a2, 2f\n"
+      ".word 0xECD80003\n"
+      "lapc $v0, 1f\n"
+      "lapc $v1, 2f\n"
+      "1:\n"
+      ".dword 0x0123456789ABCDEF\n"
+      "2:\n"
+      ".dword 0x5555555555555555\n";
+  DriverStr(expected, "LongLiteralAlignmentNoNop");
+  EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u);
+}
+
+TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) {
+  mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+  __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
+  __ LoadLabelAddress(mips64::V1, literal->GetLabel());
+  constexpr uint32_t kAdduCount = 0x3FFDF;
+  for (uint32_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  // A nop will be inserted here before the 64-bit literal.
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(3f - 1b)\n"
+      "ld $v0, %lo(3f - 1b)($at)\n"
+      "2:\n"
+      "auipc $at, %hi(3f - 2b)\n"
+      "daddiu $v1, $at, %lo(3f - 2b)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "nop\n"
+      "3:\n"
+      ".dword 0x0123456789ABCDEF\n";
+  DriverStr(expected, "FarLongLiteralAlignmentNop");
+  EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4);
+}
+
 // MSA instructions.
 
 TEST_F(AssemblerMIPS64Test, AndV) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b50f1af..b89af10 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1606,6 +1606,42 @@
 }
 
 
+void X86Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x68);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x69);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x6A);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x6D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
   DCHECK(shift_count.is_uint8());
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8578340..511eeb9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -546,6 +546,11 @@
   void punpckldq(XmmRegister dst, XmmRegister src);
   void punpcklqdq(XmmRegister dst, XmmRegister src);
 
+  void punpckhbw(XmmRegister dst, XmmRegister src);
+  void punpckhwd(XmmRegister dst, XmmRegister src);
+  void punpckhdq(XmmRegister dst, XmmRegister src);
+  void punpckhqdq(XmmRegister dst, XmmRegister src);
+
   void psllw(XmmRegister reg, const Immediate& shift_count);
   void pslld(XmmRegister reg, const Immediate& shift_count);
   void psllq(XmmRegister reg, const Immediate& shift_count);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 3e1244e..d2122db 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -777,6 +777,22 @@
   DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
 }
 
+TEST_F(AssemblerX86Test, Punpckhbw) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86Test, Punpckhwd) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86Test, Punpckhdq) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86Test, Punpckhqdq) {
+  DriverStr(RepeatFF(&x86::X86Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
 TEST_F(AssemblerX86Test, psllw) {
   GetAssembler()->psllw(x86::XMM0, CreateImmediate(16));
   DriverStr("psllw $0x10, %xmm0\n", "psllwi");
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ea69a1c..3bff67d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1835,6 +1835,46 @@
 }
 
 
+void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x68);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x69);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x6A);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x6D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
   DCHECK(shift_count.is_uint8());
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1931,6 +1971,18 @@
 }
 
 
+void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
+  DCHECK(shift_count.is_uint8());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0x73);
+  EmitXmmRegisterOperand(3, reg);
+  EmitUint8(shift_count.value());
+}
+
+
 void X86_64Assembler::fldl(const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xDD);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 41450bf..3dab235 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -574,6 +574,11 @@
   void punpckldq(XmmRegister dst, XmmRegister src);
   void punpcklqdq(XmmRegister dst, XmmRegister src);
 
+  void punpckhbw(XmmRegister dst, XmmRegister src);
+  void punpckhwd(XmmRegister dst, XmmRegister src);
+  void punpckhdq(XmmRegister dst, XmmRegister src);
+  void punpckhqdq(XmmRegister dst, XmmRegister src);
+
   void psllw(XmmRegister reg, const Immediate& shift_count);
   void pslld(XmmRegister reg, const Immediate& shift_count);
   void psllq(XmmRegister reg, const Immediate& shift_count);
@@ -585,6 +590,7 @@
   void psrlw(XmmRegister reg, const Immediate& shift_count);
   void psrld(XmmRegister reg, const Immediate& shift_count);
   void psrlq(XmmRegister reg, const Immediate& shift_count);
+  void psrldq(XmmRegister reg, const Immediate& shift_count);
 
   void flds(const Address& src);
   void fstps(const Address& dst);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9f2c44d..85afee0 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1465,6 +1465,22 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
 }
 
+TEST_F(AssemblerX86_64Test, Punpckhbw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhwd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhdq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhqdq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
 TEST_F(AssemblerX86_64Test, Psllw) {
   GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0),  x86_64::Immediate(1));
   GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
@@ -1521,6 +1537,13 @@
             "psrlq $2, %xmm15\n", "pslrqi");
 }
 
+TEST_F(AssemblerX86_64Test, Psrldq) {
+  GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0),  x86_64::Immediate(1));
+  GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+  DriverStr("psrldq $1, %xmm0\n"
+            "psrldq $2, %xmm15\n", "pslrdqi");
+}
+
 TEST_F(AssemblerX86_64Test, UcomissAddress) {
   GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index e9ec5fa..e3e0180 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1777,11 +1777,15 @@
   }
 
   bool ShouldCompileDexFilesIndividually() const {
-    // Compile individually if we are not building an image, not using any compilation, and are
-    // using multidex.
-    // This means extract, verify, and quicken, will use the individual compilation mode (to reduce
-    // RAM used by the compiler).
+    // Compile individually if we are:
+    // 1. not building an image,
+    // 2. not verifying a vdex file,
+    // 3. using multidex,
+    // 4. not doing any AOT compilation.
+    // This means extract, no-vdex verify, and quicken, will use the individual compilation
+    // mode (to reduce RAM used by the compiler).
     return !IsImage() &&
+        !update_input_vdex_ &&
         dex_files_.size() > 1 &&
         !CompilerFilter::IsAotCompilationEnabled(compiler_options_->GetCompilerFilter());
   }
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 6a9d979..0aa766c 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -950,6 +950,7 @@
 }
 
 TEST_F(Dex2oatWatchdogTest, TestWatchdogTrigger) {
+  TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND();  // b/63052624
   // Check with ten milliseconds.
   RunTest(false, { "--watchdog-timeout=10" });
 }
@@ -1187,4 +1188,164 @@
   EXPECT_GT(app_image_file->GetLength(), 0u);
 }
 
+// Test that dexlayout section info is correctly written to the oat file for profile based
+// compilation.
+TEST_F(Dex2oatTest, LayoutSections) {
+  using Hotness = ProfileCompilationInfo::MethodHotness;
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  ScratchFile profile_file;
+  // We can only layout method indices with code items, figure out which ones have this property
+  // first.
+  std::vector<uint16_t> methods;
+  {
+    const DexFile::TypeId* type_id = dex->FindTypeId("LManyMethods;");
+    dex::TypeIndex type_idx = dex->GetIndexForTypeId(*type_id);
+    const DexFile::ClassDef* class_def = dex->FindClassDef(type_idx);
+    ClassDataItemIterator it(*dex, dex->GetClassData(*class_def));
+    it.SkipAllFields();
+    std::set<size_t> code_item_offsets;
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      const uint16_t method_idx = it.GetMemberIndex();
+      const size_t code_item_offset = it.GetMethodCodeItemOffset();
+      if (code_item_offsets.insert(code_item_offset).second) {
+        // Unique code item, add the method index.
+        methods.push_back(method_idx);
+      }
+    }
+    DCHECK(!it.HasNext());
+  }
+  ASSERT_GE(methods.size(), 8u);
+  std::vector<uint16_t> hot_methods = {methods[1], methods[3], methods[5]};
+  std::vector<uint16_t> startup_methods = {methods[1], methods[2], methods[7]};
+  std::vector<uint16_t> post_methods = {methods[0], methods[2], methods[6]};
+  // Here, we build the profile from the method lists.
+  ProfileCompilationInfo info;
+  info.AddMethodsForDex(
+      static_cast<Hotness::Flag>(Hotness::kFlagHot | Hotness::kFlagStartup),
+      dex.get(),
+      hot_methods.begin(),
+      hot_methods.end());
+  info.AddMethodsForDex(
+      Hotness::kFlagStartup,
+      dex.get(),
+      startup_methods.begin(),
+      startup_methods.end());
+  info.AddMethodsForDex(
+      Hotness::kFlagPostStartup,
+      dex.get(),
+      post_methods.begin(),
+      post_methods.end());
+  for (uint16_t id : hot_methods) {
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsHot());
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsStartup());
+  }
+  for (uint16_t id : startup_methods) {
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsStartup());
+  }
+  for (uint16_t id : post_methods) {
+    EXPECT_TRUE(info.GetMethodHotness(MethodReference(dex.get(), id)).IsPostStartup());
+  }
+  // Save the profile since we want to use it with dex2oat to produce an oat file.
+  ASSERT_TRUE(info.Save(profile_file.GetFd()));
+  // Generate a profile based odex.
+  const std::string dir = GetScratchDir();
+  const std::string oat_filename = dir + "/base.oat";
+  const std::string vdex_filename = dir + "/base.vdex";
+  std::string error_msg;
+  const int res = GenerateOdexForTestWithStatus(
+      {dex->GetLocation()},
+      oat_filename,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--profile-file=" + profile_file.GetFilename()});
+  EXPECT_EQ(res, 0);
+
+  // Open our generated oat file.
+  std::unique_ptr<OatFile> odex_file(OatFile::Open(oat_filename.c_str(),
+                                                   oat_filename.c_str(),
+                                                   nullptr,
+                                                   nullptr,
+                                                   false,
+                                                   /*low_4gb*/false,
+                                                   dex->GetLocation().c_str(),
+                                                   &error_msg));
+  ASSERT_TRUE(odex_file != nullptr);
+  std::vector<const OatDexFile*> oat_dex_files = odex_file->GetOatDexFiles();
+  ASSERT_EQ(oat_dex_files.size(), 1u);
+  // Check that the code sections match what we expect.
+  for (const OatDexFile* oat_dex : oat_dex_files) {
+    const DexLayoutSections* const sections = oat_dex->GetDexLayoutSections();
+    // Testing of logging the sections.
+    ASSERT_TRUE(sections != nullptr);
+    LOG(INFO) << *sections;
+
+    // Load the sections into temporary variables for convenience.
+    const DexLayoutSection& code_section =
+        sections->sections_[static_cast<size_t>(DexLayoutSections::SectionType::kSectionTypeCode)];
+    const DexLayoutSection::Subsection& section_hot_code =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeHot)];
+    const DexLayoutSection::Subsection& section_sometimes_used =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeSometimesUsed)];
+    const DexLayoutSection::Subsection& section_startup_only =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeStartupOnly)];
+    const DexLayoutSection::Subsection& section_unused =
+        code_section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeUnused)];
+
+    // All the sections should be non-empty.
+    EXPECT_GT(section_hot_code.size_, 0u);
+    EXPECT_GT(section_sometimes_used.size_, 0u);
+    EXPECT_GT(section_startup_only.size_, 0u);
+    EXPECT_GT(section_unused.size_, 0u);
+
+    // Open the dex file since we need to peek at the code items to verify the layout matches what
+    // we expect.
+    std::unique_ptr<const DexFile> dex_file(oat_dex->OpenDexFile(&error_msg));
+    ASSERT_TRUE(dex_file != nullptr) << error_msg;
+    const DexFile::TypeId* type_id = dex_file->FindTypeId("LManyMethods;");
+    ASSERT_TRUE(type_id != nullptr);
+    dex::TypeIndex type_idx = dex_file->GetIndexForTypeId(*type_id);
+    const DexFile::ClassDef* class_def = dex_file->FindClassDef(type_idx);
+    ASSERT_TRUE(class_def != nullptr);
+
+    // Count how many code items are for each category, there should be at least one per category.
+    size_t hot_count = 0;
+    size_t post_startup_count = 0;
+    size_t startup_count = 0;
+    size_t unused_count = 0;
+    // Visit all of the methdos of the main class and cross reference the method indices to their
+    // corresponding code item offsets to verify the layout.
+    ClassDataItemIterator it(*dex_file, dex_file->GetClassData(*class_def));
+    it.SkipAllFields();
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      const size_t method_idx = it.GetMemberIndex();
+      const size_t code_item_offset = it.GetMethodCodeItemOffset();
+      const bool is_hot = ContainsElement(hot_methods, method_idx);
+      const bool is_startup = ContainsElement(startup_methods, method_idx);
+      const bool is_post_startup = ContainsElement(post_methods, method_idx);
+      if (is_hot) {
+        // Hot is highest precedence, check that the hot methods are in the hot section.
+        EXPECT_LT(code_item_offset - section_hot_code.offset_, section_hot_code.size_);
+        ++hot_count;
+      } else if (is_post_startup) {
+        // Post startup is sometimes used section.
+        EXPECT_LT(code_item_offset - section_sometimes_used.offset_, section_sometimes_used.size_);
+        ++post_startup_count;
+      } else if (is_startup) {
+        // Startup at this point means not hot or post startup, these must be startup only then.
+        EXPECT_LT(code_item_offset - section_startup_only.offset_, section_startup_only.size_);
+        ++startup_count;
+      } else {
+        // If no flags are set, the method should be unused.
+        EXPECT_LT(code_item_offset - section_unused.offset_, section_unused.size_);
+        ++unused_count;
+      }
+    }
+    DCHECK(!it.HasNext());
+    EXPECT_GT(hot_count, 0u);
+    EXPECT_GT(post_startup_count, 0u);
+    EXPECT_GT(startup_count, 0u);
+    EXPECT_GT(unused_count, 0u);
+  }
+}
+
 }  // namespace art
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index fd92d77..401a3ee 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -34,6 +34,7 @@
 #include "android-base/stringprintf.h"
 
 #include "dex_file-inl.h"
+#include "dex_file_layout.h"
 #include "dex_file_verifier.h"
 #include "dex_instruction-inl.h"
 #include "dex_ir_builder.h"
@@ -1680,21 +1681,13 @@
     }
   }
 
-  enum CodeItemState {
-    kCodeItemStateExecStartupOnly = 0,
-    kCodeItemStateHot,
-    kCodeItemStateClinit,
-    kCodeItemStateExec,
-    kCodeItemStateNotExecuted,
-    kCodeItemStateSize,
-  };
-
   static constexpr InvokeType invoke_types[] = {
     kDirect,
     kVirtual
   };
 
-  std::unordered_set<dex_ir::CodeItem*> code_items[kCodeItemStateSize];
+  const size_t num_layout_types = static_cast<size_t>(LayoutType::kLayoutTypeCount);
+  std::unordered_set<dex_ir::CodeItem*> code_items[num_layout_types];
   for (InvokeType invoke_type : invoke_types) {
     for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
       const bool is_profile_class =
@@ -1719,20 +1712,20 @@
         const bool is_startup_clinit = is_profile_class && is_clinit;
         using Hotness = ProfileCompilationInfo::MethodHotness;
         Hotness hotness = info_->GetMethodHotness(MethodReference(dex_file, method_id->GetIndex()));
-        CodeItemState state = kCodeItemStateNotExecuted;
+        LayoutType state = LayoutType::kLayoutTypeUnused;
         if (hotness.IsHot()) {
           // Hot code is compiled, maybe one day it won't be accessed. So lay it out together for
           // now.
-          state = kCodeItemStateHot;
+          state = LayoutType::kLayoutTypeHot;
         } else if (is_startup_clinit || hotness.GetFlags() == Hotness::kFlagStartup) {
           // Startup clinit or a method that only has the startup flag.
-          state = kCodeItemStateExecStartupOnly;
+          state = LayoutType::kLayoutTypeStartupOnly;
         } else if (is_clinit) {
-          state = kCodeItemStateClinit;
+          state = LayoutType::kLayoutTypeUsedOnce;
         } else if (hotness.IsInProfile()) {
-          state = kCodeItemStateExec;
+          state = LayoutType::kLayoutTypeSometimesUsed;
         }
-        code_items[state].insert(code_item);
+        code_items[static_cast<size_t>(state)].insert(code_item);
       }
     }
   }
@@ -1741,10 +1734,11 @@
   int32_t total_diff = 0;
   // The relative placement has no effect on correctness; it is used to ensure
   // the layout is deterministic
-  for (std::unordered_set<dex_ir::CodeItem*>& code_items_set : code_items) {
+  for (size_t index = 0; index < num_layout_types; ++index) {
+    const std::unordered_set<dex_ir::CodeItem*>& code_items_set = code_items[index];
     // diff is reset for each class of code items.
     int32_t diff = 0;
-    uint32_t start_offset = code_item_offset;
+    const uint32_t start_offset = code_item_offset;
     for (dex_ir::ClassData* data : new_class_data_order) {
       data->SetOffset(data->GetOffset() + diff);
       for (InvokeType invoke_type : invoke_types) {
@@ -1763,9 +1757,13 @@
         }
       }
     }
-    for (size_t i = 0; i < kCodeItemStateSize; ++i) {
+    DexLayoutSection& code_section = dex_sections_.sections_[static_cast<size_t>(
+        DexLayoutSections::SectionType::kSectionTypeCode)];
+    code_section.parts_[index].offset_ = start_offset;
+    code_section.parts_[index].size_ = code_item_offset - start_offset;
+    for (size_t i = 0; i < num_layout_types; ++i) {
       VLOG(dex) << "Code item layout bucket " << i << " count=" << code_items[i].size()
-                << " bytes=" << code_item_offset - start_offset;
+                << " bytes=" << code_section.parts_[i].size_;
     }
     total_diff += diff;
   }
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index ed011d6..9f6e8a4 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -26,6 +26,7 @@
 #include <stdint.h>
 #include <stdio.h>
 
+#include "dex_file_layout.h"
 #include "dex_ir.h"
 #include "mem_map.h"
 
@@ -84,6 +85,10 @@
 
   MemMap* GetAndReleaseMemMap() { return mem_map_.release(); }
 
+  const DexLayoutSections& GetSections() const {
+    return dex_sections_;
+  }
+
  private:
   void DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item);
   void DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset);
@@ -129,6 +134,7 @@
   FILE* out_file_;
   dex_ir::Header* header_;
   std::unique_ptr<MemMap> mem_map_;
+  DexLayoutSections dex_sections_;
 
   DISALLOW_COPY_AND_ASSIGN(DexLayout);
 };
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 1a395a4..938ea5d 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -112,6 +112,8 @@
   { kRTypeMask, 34, "sub", "DST", },
   { kRTypeMask, 35, "subu", "DST", },
   { kRTypeMask, 36, "and", "DST", },
+  { kRTypeMask | (0x1f << 16), 37 | (0 << 16), "move", "DS" },
+  { kRTypeMask | (0x1f << 21), 37 | (0 << 21), "move", "DT" },
   { kRTypeMask, 37, "or", "DST", },
   { kRTypeMask, 38, "xor", "DST", },
   { kRTypeMask, 39, "nor", "DST", },
@@ -214,13 +216,19 @@
   { kJTypeMask, 3 << kOpcodeShift, "jal", "L" },
 
   // I-type instructions.
+  { kITypeMask | (0x3ff << 16), 4 << kOpcodeShift, "b", "B" },
+  { kITypeMask | (0x1f << 16), 4 << kOpcodeShift | (0 << 16), "beqz", "SB" },
+  { kITypeMask | (0x1f << 21), 4 << kOpcodeShift | (0 << 21), "beqz", "TB" },
   { kITypeMask, 4 << kOpcodeShift, "beq", "STB" },
+  { kITypeMask | (0x1f << 16), 5 << kOpcodeShift | (0 << 16), "bnez", "SB" },
+  { kITypeMask | (0x1f << 21), 5 << kOpcodeShift | (0 << 21), "bnez", "TB" },
   { kITypeMask, 5 << kOpcodeShift, "bne", "STB" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (1 << 16), "bgez", "SB" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (0 << 16), "bltz", "SB" },
-  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (2 << 16), "bltzl", "SB" },
+  { kITypeMask | (0x3ff << 16), 1 << kOpcodeShift | (16 << 16), "nal", "" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (16 << 16), "bltzal", "SB" },
-  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (18 << 16), "bltzall", "SB" },
+  { kITypeMask | (0x3ff << 16), 1 << kOpcodeShift | (17 << 16), "bal", "B" },
+  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (17 << 16), "bgezal", "SB" },
   { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" },
   { kITypeMask, 6 << kOpcodeShift, "bgeuc", "STB" },
   { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" },
@@ -228,18 +236,16 @@
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", },
 
-  { 0xffff0000, (4 << kOpcodeShift), "b", "B" },
-  { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" },
-
   { kITypeMask, 8 << kOpcodeShift, "beqc", "STB" },
 
-  { kITypeMask, 8 << kOpcodeShift, "addi", "TSi", },
+  { kITypeMask | (0x1f << 21), 9 << kOpcodeShift | (0 << 21), "li", "Ti" },
   { kITypeMask, 9 << kOpcodeShift, "addiu", "TSi", },
   { kITypeMask, 10 << kOpcodeShift, "slti", "TSi", },
   { kITypeMask, 11 << kOpcodeShift, "sltiu", "TSi", },
-  { kITypeMask, 12 << kOpcodeShift, "andi", "TSi", },
-  { kITypeMask, 13 << kOpcodeShift, "ori", "TSi", },
-  { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", },
+  { kITypeMask, 12 << kOpcodeShift, "andi", "TSI", },
+  { kITypeMask | (0x1f << 21), 13 << kOpcodeShift | (0 << 21), "li", "TI" },
+  { kITypeMask, 13 << kOpcodeShift, "ori", "TSI", },
+  { kITypeMask, 14 << kOpcodeShift, "xori", "TSI", },
   { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "Ti", },
   { kITypeMask, 15 << kOpcodeShift, "aui", "TSi", },
 
@@ -324,6 +330,7 @@
 
   { kITypeMask, 24 << kOpcodeShift, "bnec", "STB" },
 
+  { kITypeMask | (0x1f << 21), 25 << kOpcodeShift | (0 << 21), "dli", "Ti" },
   { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", },
   { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", },
 
@@ -561,6 +568,9 @@
               }
               continue;  // No ", ".
             }
+          case 'I':  // Unsigned lower 16-bit immediate.
+            args << (instruction & 0xffff);
+            break;
           case 'i':  // Sign-extended lower 16-bit immediate.
             args << static_cast<int16_t>(instruction & 0xffff);
             break;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 4824f70..bbc8e37 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -792,6 +792,7 @@
         src_reg_file = dst_reg_file = SSE;
         break;
       case 0x60: case 0x61: case 0x62: case 0x6C:
+      case 0x68: case 0x69: case 0x6A: case 0x6D:
         if (prefix[2] == 0x66) {
           src_reg_file = dst_reg_file = SSE;
           prefix[2] = 0;  // Clear prefix now. It has served its purpose as part of the opcode.
@@ -803,6 +804,10 @@
           case 0x61: opcode1 = "punpcklwd"; break;
           case 0x62: opcode1 = "punpckldq"; break;
           case 0x6c: opcode1 = "punpcklqdq"; break;
+          case 0x68: opcode1 = "punpckhbw"; break;
+          case 0x69: opcode1 = "punpckhwd"; break;
+          case 0x6A: opcode1 = "punpckhdq"; break;
+          case 0x6D: opcode1 = "punpckhqdq"; break;
         }
         load = true;
         has_modrm = true;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index f8b1f53..6f833c6 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -500,6 +500,13 @@
       os << "Dex file data for " << dex_file->GetLocation() << "\n";
       data.Dump(os);
       os << "\n";
+      const DexLayoutSections* const layout_sections = oat_dex_file->GetDexLayoutSections();
+      if (layout_sections != nullptr) {
+        os << "Layout data\n";
+        os << *layout_sections;
+        os << "\n";
+      }
+
       cumulative.Add(data);
     }
     os << "Cumulative dex file data\n";
diff --git a/runtime/Android.bp b/runtime/Android.bp
index d534542..0f5a1a8 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -55,8 +55,9 @@
         "compiler_filter.cc",
         "debugger.cc",
         "dex_file.cc",
-        "dex_file_tracking_registrar.cc",
         "dex_file_annotations.cc",
+        "dex_file_layout.cc",
+        "dex_file_tracking_registrar.cc",
         "dex_file_verifier.cc",
         "dex_instruction.cc",
         "dex_to_dex_decompiler.cc",
@@ -439,6 +440,7 @@
         "debugger.h",
         "base/unix_file/fd_file.h",
         "dex_file.h",
+        "dex_file_layout.h",
         "dex_instruction.h",
         "dex_instruction_utils.h",
         "gc_root.h",
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 9756f57..845bd6d 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -8598,15 +8598,6 @@
   return GetQuickGenericJniStub();
 }
 
-void ClassLinker::SetEntryPointsToCompiledCode(ArtMethod* method, const void* code) const {
-  CHECK(code != nullptr);
-  const uint8_t* base = reinterpret_cast<const uint8_t*>(code);  // Base of data points at code.
-  base -= sizeof(void*);  // Move backward so that code_offset != 0.
-  const uint32_t code_offset = sizeof(void*);
-  OatFile::OatMethod oat_method(base, code_offset);
-  oat_method.LinkMethod(method);
-}
-
 void ClassLinker::SetEntryPointsToInterpreter(ArtMethod* method) const {
   if (!method->IsNative()) {
     method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 2fbbe79..66bcbe0 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -511,10 +511,6 @@
     return intern_table_;
   }
 
-  // Set the entrypoints up for method to the given code.
-  void SetEntryPointsToCompiledCode(ArtMethod* method, const void* method_code) const
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Set the entrypoints up for method to the enter the interpreter.
   void SetEntryPointsToInterpreter(ArtMethod* method) const
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 74bc0b2..e2131f1 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -288,6 +288,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND() \
+  if (RUNNING_ON_MEMORY_TOOL > 0 && kMemoryToolIsValgrind) { \
+    printf("WARNING: TEST DISABLED FOR MEMORY TOOL VALGRIND\n"); \
+    return; \
+  }
+
 #define TEST_DISABLED_FOR_MEMORY_TOOL_ASAN() \
   if (RUNNING_ON_MEMORY_TOOL > 0 && !kMemoryToolIsValgrind) { \
     printf("WARNING: TEST DISABLED FOR MEMORY TOOL ASAN\n"); \
diff --git a/runtime/dex_file_layout.cc b/runtime/dex_file_layout.cc
new file mode 100644
index 0000000..4375d7f
--- /dev/null
+++ b/runtime/dex_file_layout.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file_layout.h"
+
+#include <sys/mman.h>
+
+#include "dex_file.h"
+#include "utils.h"
+
+namespace art {
+
+void DexLayoutSection::Subsection::Madvise(const DexFile* dex_file, int advice) const {
+  DCHECK(dex_file != nullptr);
+  DCHECK_LE(size_, dex_file->Size());
+  DCHECK_LE(offset_ + size_, dex_file->Size());
+  MadviseLargestPageAlignedRegion(dex_file->Begin() + offset_,
+                                  dex_file->Begin() + offset_ + size_,
+                                  advice);
+}
+
+void DexLayoutSections::Madvise(const DexFile* dex_file, MadviseState state) const {
+  // The dex file is already defaulted to random access everywhere.
+  for (const DexLayoutSection& section : sections_) {
+    switch (state) {
+      case MadviseState::kMadviseStateAtLoad: {
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeStartupOnly)].Madvise(
+            dex_file,
+            MADV_WILLNEED);
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeHot)].Madvise(
+            dex_file,
+            MADV_WILLNEED);
+        break;
+      }
+      case MadviseState::kMadviseStateFinishedLaunch: {
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeStartupOnly)].Madvise(
+            dex_file,
+            MADV_DONTNEED);
+        break;
+      }
+      case MadviseState::kMadviseStateFinishedTrim: {
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeSometimesUsed)].Madvise(
+            dex_file,
+            MADV_DONTNEED);
+        section.parts_[static_cast<size_t>(LayoutType::kLayoutTypeUsedOnce)].Madvise(
+            dex_file,
+            MADV_DONTNEED);
+        break;
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const DexLayoutSection& section) {
+  for (size_t i = 0; i < static_cast<size_t>(LayoutType::kLayoutTypeCount); ++i) {
+    const DexLayoutSection::Subsection& part = section.parts_[i];
+    os << static_cast<LayoutType>(i) << "("
+       << part.offset_ << "-" << part.offset_ + part.size_ << ") ";
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const DexLayoutSections& sections) {
+  for (size_t i = 0; i < static_cast<size_t>(DexLayoutSections::SectionType::kSectionCount); ++i) {
+    os << static_cast<DexLayoutSections::SectionType>(i) << ":" << sections.sections_[i] << "\n";
+  }
+  return os;
+}
+
+}  // namespace art
diff --git a/runtime/dex_file_layout.h b/runtime/dex_file_layout.h
new file mode 100644
index 0000000..40cc912
--- /dev/null
+++ b/runtime/dex_file_layout.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_FILE_LAYOUT_H_
+#define ART_RUNTIME_DEX_FILE_LAYOUT_H_
+
+#include <cstdint>
+#include <iosfwd>
+
+namespace art {
+
+class DexFile;
+
+enum class LayoutType : uint8_t {
+  // Layout of things that are randomly used. These should be advised to random access.
+  // Without layout, this is the default mode when loading a dex file.
+  kLayoutTypeSometimesUsed,
+  // Layout of things that are only used during startup, these can be madvised after launch.
+  kLayoutTypeStartupOnly,
+  // Layout of things that are hot (commonly accessed), these should be pinned or madvised will
+  // need.
+  kLayoutTypeHot,
+  // Layout of things that are needed probably only once (class initializers). These can be
+  // madvised during trim events.
+  kLayoutTypeUsedOnce,
+  // Layout of things that are thought to be unused. These things should be advised to random
+  // access.
+  kLayoutTypeUnused,
+  // Unused value, just the number of elements in the enum.
+  kLayoutTypeCount,
+};
+std::ostream& operator<<(std::ostream& os, const LayoutType& collector_type);
+
+enum class MadviseState : uint8_t {
+  // Madvise based on a file that was just loaded.
+  kMadviseStateAtLoad,
+  // Madvise based after launch is finished.
+  kMadviseStateFinishedLaunch,
+  // Trim by madvising code that is unlikely to be too important in the future.
+  kMadviseStateFinishedTrim,
+};
+std::ostream& operator<<(std::ostream& os, const MadviseState& collector_type);
+
+// A dex layout section such as code items or strings. Each section is composed of subsections
+// that are layed out ajacently to each other such as (hot, unused, startup, etc...).
+class DexLayoutSection {
+ public:
+  // A subsection is a a continuous range of dex file that is all part of the same layout hint.
+  class Subsection {
+   public:
+    // Use uint32_t to handle 32/64 bit cross compilation.
+    uint32_t offset_ = 0u;
+    uint32_t size_ = 0u;
+
+    void Madvise(const DexFile* dex_file, int advice) const;
+  };
+
+  Subsection parts_[static_cast<size_t>(LayoutType::kLayoutTypeCount)];
+};
+
+// A set of dex layout sections, currently there is only one section for code and one for strings.
+class DexLayoutSections {
+ public:
+  enum class SectionType : uint8_t {
+    kSectionTypeCode,
+    kSectionTypeStrings,
+    kSectionCount,
+  };
+
+  // Advise access about the dex file based on layout. The caller is expected to have already
+  // madvised to MADV_RANDOM.
+  void Madvise(const DexFile* dex_file, MadviseState state) const;
+
+  DexLayoutSection sections_[static_cast<size_t>(SectionType::kSectionCount)];
+};
+
+std::ostream& operator<<(std::ostream& os, const DexLayoutSections::SectionType& collector_type);
+std::ostream& operator<<(std::ostream& os, const DexLayoutSection& section);
+std::ostream& operator<<(std::ostream& os, const DexLayoutSections& sections);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_FILE_LAYOUT_H_
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 381e95f..af6a45f 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -356,7 +356,8 @@
             sampled_methods->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
           }
         } else {
-          CHECK_EQ(method.GetCounter(), 0u);
+          CHECK_EQ(method.GetCounter(), 0u) << method.PrettyMethod()
+              << " access_flags=" << method.GetAccessFlags();
         }
       }
     }
diff --git a/runtime/oat.h b/runtime/oat.h
index c4a983e..1d79ed6 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: MIPS Baker thunks.
-  static constexpr uint8_t kOatVersion[] = { '1', '3', '1', '\0' };
+  // Last oat version changed reason: Add dex section layout info to header.
+  static constexpr uint8_t kOatVersion[] = { '1', '3', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 4033f8c..0af0622 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -544,6 +544,19 @@
       return false;
     }
 
+    uint32_t dex_layout_sections_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_layout_sections_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after dex layout sections offset",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str());
+      return false;
+    }
+    const DexLayoutSections* const dex_layout_sections = dex_layout_sections_offset != 0
+        ? reinterpret_cast<const DexLayoutSections*>(Begin() + dex_layout_sections_offset)
+        : nullptr;
+
     uint32_t method_bss_mapping_offset;
     if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &method_bss_mapping_offset))) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
@@ -635,7 +648,8 @@
                                               lookup_table_data,
                                               method_bss_mapping,
                                               class_offsets_pointer,
-                                              current_dex_cache_arrays);
+                                              current_dex_cache_arrays,
+                                              dex_layout_sections);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
     // Add the location and canonical location (if different) to the oat_dex_files_ table.
@@ -1362,7 +1376,8 @@
                                 const uint8_t* lookup_table_data,
                                 const MethodBssMapping* method_bss_mapping_data,
                                 const uint32_t* oat_class_offsets_pointer,
-                                uint8_t* dex_cache_arrays)
+                                uint8_t* dex_cache_arrays,
+                                const DexLayoutSections* dex_layout_sections)
     : oat_file_(oat_file),
       dex_file_location_(dex_file_location),
       canonical_dex_file_location_(canonical_dex_file_location),
@@ -1371,7 +1386,8 @@
       lookup_table_data_(lookup_table_data),
       method_bss_mapping_(method_bss_mapping_data),
       oat_class_offsets_pointer_(oat_class_offsets_pointer),
-      dex_cache_arrays_(dex_cache_arrays) {
+      dex_cache_arrays_(dex_cache_arrays),
+      dex_layout_sections_(dex_layout_sections) {
   // Initialize TypeLookupTable.
   if (lookup_table_data_ != nullptr) {
     // Peek the number of classes from the DexFile.
@@ -1477,6 +1493,23 @@
   return nullptr;
 }
 
+// Madvise the dex file based on the state we are moving to.
+void OatDexFile::MadviseDexFile(const DexFile& dex_file, MadviseState state) {
+  if (state == MadviseState::kMadviseStateAtLoad) {
+    // Default every dex file to MADV_RANDOM when its loaded by default.
+    MadviseLargestPageAlignedRegion(dex_file.Begin(),
+                                    dex_file.Begin() + dex_file.Size(),
+                                    MADV_RANDOM);
+  }
+  const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
+  if (oat_dex_file != nullptr) {
+    // Should always be there.
+    const DexLayoutSections* const sections = oat_dex_file->GetDexLayoutSections();
+    CHECK(sections != nullptr);
+    sections->Madvise(&dex_file, state);
+  }
+}
+
 OatFile::OatClass::OatClass(const OatFile* oat_file,
                             mirror::Class::Status status,
                             OatClassType type,
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index be7d495..9a7fe51 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -26,6 +26,7 @@
 #include "base/stringpiece.h"
 #include "compiler_filter.h"
 #include "dex_file.h"
+#include "dex_file_layout.h"
 #include "method_bss_mapping.h"
 #include "mirror/class.h"
 #include "oat.h"
@@ -38,6 +39,7 @@
 
 class BitVector;
 class ElfFile;
+class DexLayoutSections;
 template <class MirrorType> class GcRoot;
 class MemMap;
 class OatDexFile;
@@ -442,6 +444,9 @@
                                                const char* descriptor,
                                                size_t hash);
 
+  // Madvise the dex file based on the state we are moving to.
+  static void MadviseDexFile(const DexFile& dex_file, MadviseState state);
+
   TypeLookupTable* GetTypeLookupTable() const {
     return lookup_table_.get();
   }
@@ -451,6 +456,11 @@
   // Create only with a type lookup table, used by the compiler to speed up compilation.
   explicit OatDexFile(std::unique_ptr<TypeLookupTable>&& lookup_table);
 
+  // Return the dex layout sections.
+  const DexLayoutSections* GetDexLayoutSections() const {
+    return dex_layout_sections_;
+  }
+
  private:
   OatDexFile(const OatFile* oat_file,
              const std::string& dex_file_location,
@@ -460,7 +470,8 @@
              const uint8_t* lookup_table_data,
              const MethodBssMapping* method_bss_mapping,
              const uint32_t* oat_class_offsets_pointer,
-             uint8_t* dex_cache_arrays);
+             uint8_t* dex_cache_arrays,
+             const DexLayoutSections* dex_layout_sections);
 
   static void AssertAotCompiler();
 
@@ -474,6 +485,7 @@
   const uint32_t* const oat_class_offsets_pointer_ = 0u;
   uint8_t* const dex_cache_arrays_ = nullptr;
   mutable std::unique_ptr<TypeLookupTable> lookup_table_;
+  const DexLayoutSections* const dex_layout_sections_ = nullptr;
 
   friend class OatFile;
   friend class OatFileBase;
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 5baf59c..de8f7ed 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -48,9 +48,12 @@
 
 using android::base::StringPrintf;
 
-// If true, then we attempt to load the application image if it exists.
+// If true, we attempt to load the application image if it exists.
 static constexpr bool kEnableAppImage = true;
 
+// If true, we advise the kernel about dex file mem map accesses.
+static constexpr bool kMadviseDexFileAccesses = false;
+
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   DCHECK(oat_file != nullptr);
@@ -569,6 +572,11 @@
     }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
+    } else if (kMadviseDexFileAccesses) {
+      // Opened dex files from an oat file, madvise them to their loaded state.
+       for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+         OatDexFile::MadviseDexFile(*dex_file, MadviseState::kMadviseStateAtLoad);
+       }
     }
   }
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ffa9d45..3fe18c7 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -18,6 +18,7 @@
 
 #include <inttypes.h>
 #include <pthread.h>
+#include <sys/mman.h>  // For madvise
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
@@ -940,4 +941,18 @@
   }
 }
 
+int MadviseLargestPageAlignedRegion(const uint8_t* begin, const uint8_t* end, int advice) {
+  DCHECK_LE(begin, end);
+  begin = AlignUp(begin, kPageSize);
+  end = AlignDown(end, kPageSize);
+  if (begin < end) {
+    int result = madvise(const_cast<uint8_t*>(begin), end - begin, advice);
+    if (result != 0) {
+      PLOG(WARNING) << "madvise failed " << result;
+    }
+    return result;
+  }
+  return 0;
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index f1f5576..739681d 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -338,6 +338,9 @@
   return (opnd < 0) ? -1 : ((opnd == 0) ? 0 : 1);
 }
 
+// Madvise the largest page aligned region within begin and end.
+int MadviseLargestPageAlignedRegion(const uint8_t* begin, const uint8_t* end, int advice);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index bca3df6..13a96c7 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -34,6 +34,12 @@
             disableStackFrameAsserts();
         }
 
+        ensureJitCompiled(Main.class, "recursiveSync");
+        ensureJitCompiled(Main.class, "nestedMayThrow");
+        ensureJitCompiled(Main.class, "constantLock");
+        ensureJitCompiled(Main.class, "notExcessiveNesting");
+        ensureJitCompiled(Main.class, "notNested");
+
         Main m = new Main();
 
         m.recursiveSync(0);
@@ -273,4 +279,5 @@
     public static native boolean runtimeIsSoftFail();
     public static native boolean isInterpreted();
     public static native void disableStackFrameAsserts();
+    private static native void ensureJitCompiled(Class<?> itf, String method_name);
 }
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 6fd9cdd..9e6fd3d 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -424,16 +424,29 @@
     return - (left * right);
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecAdd
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     VecMul
   /// CHECK-NOT:     VecAdd
   public static void SimdMulAdd(int[] array1, int[] array2) {
@@ -442,16 +455,47 @@
     }
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecAdd
+  public static void SimdMulAddLong(long[] array1, long[] array2) {
+    for (int j = 0; j < 100; j++) {
+      array2[j] += 12345 * array1[j];
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecSub
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     VecMul
   /// CHECK-NOT:     VecSub
   public static void SimdMulSub(int[] array1, int[] array2) {
@@ -460,12 +504,38 @@
     }
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT:     VecMul
+  /// CHECK-NOT:     VecSub
+  public static void SimdMulSubLong(long[] array1, long[] array2) {
+    for (int j = 0; j < 100; j++) {
+      array2[j] -= 12345 * array1[j];
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT: VecMultiplyAccumulate
+
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT: VecMultiplyAccumulate
   public static void SimdMulMultipleUses(int[] array1, int[] array2) {
     for (int j = 0; j < 100; j++) {
@@ -475,6 +545,21 @@
     }
   }
 
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (after)
+  /// CHECK-NOT: VecMultiplyAccumulate
+  public static void SimdMulMultipleUsesLong(long[] array1, long[] array2) {
+    for (int j = 0; j < 100; j++) {
+       long temp = 12345 * array1[j];
+       array2[j] -= temp;
+       array1[j] = temp;
+    }
+  }
+
   public static final int ARRAY_SIZE = 1000;
 
   public static void initArray(int[] array) {
@@ -483,6 +568,12 @@
     }
   }
 
+  public static void initArrayLong(long[] array) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      array[i] = i;
+    }
+  }
+
   public static int calcArraySum(int[] array) {
     int sum = 0;
     for (int i = 0; i < ARRAY_SIZE; i++) {
@@ -491,19 +582,39 @@
     return sum;
   }
 
+  public static long calcArraySumLong(long[] array) {
+    long sum = 0;
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      sum += array[i];
+    }
+    return sum;
+  }
+
   public static void testSimdMultiplyAccumulate() {
     int[] array1 = new int[ARRAY_SIZE];
     int[] array2 = new int[ARRAY_SIZE];
+    long[] array3 = new long[ARRAY_SIZE];
+    long[] array4 = new long[ARRAY_SIZE];
 
     initArray(array1);
     initArray(array2);
     SimdMulSub(array1, array2);
     assertIntEquals(-60608250, calcArraySum(array2));
 
+    initArrayLong(array3);
+    initArrayLong(array4);
+    SimdMulSubLong(array3, array4);
+    assertLongEquals(-60608250, calcArraySumLong(array4));
+
     initArray(array1);
     initArray(array2);
     SimdMulAdd(array1, array2);
     assertIntEquals(61607250, calcArraySum(array2));
+
+    initArrayLong(array3);
+    initArrayLong(array4);
+    SimdMulAddLong(array3, array4);
+    assertLongEquals(61607250, calcArraySumLong(array4));
   }
 
   public static void main(String[] args) {
diff --git a/test/592-checker-regression-bool-input/smali/TestCase.smali b/test/592-checker-regression-bool-input/smali/TestCase.smali
index 56c499d..ad4e902 100644
--- a/test/592-checker-regression-bool-input/smali/TestCase.smali
+++ b/test/592-checker-regression-bool-input/smali/TestCase.smali
@@ -16,8 +16,15 @@
 
 .super Ljava/lang/Object;
 
+## CHECK-START: boolean TestCase.testCase() select_generator (after)
+## CHECK-DAG:     <<Select:i\d+>>          Select
+## CHECK-DAG:                              Return [<<Select>>]
+
 ## CHECK-START: boolean TestCase.testCase() load_store_elimination (after)
-## CHECK-DAG:     If [{{b\d+}}]
+## CHECK-DAG:     <<Or:i\d+>>              Or
+## CHECK-DAG:     <<TypeConversion:b\d+>>  TypeConversion
+## CHECK-DAG:                              StaticFieldSet
+## CHECK-DAG:                              Return [<<TypeConversion>>]
 
 .method public static testCase()Z
     .registers 6
@@ -31,7 +38,8 @@
     # LSE will replace this sget with the type conversion above...
     sget-boolean v2, LMain;->field2:Z
 
-    # ... and generate an If with a byte-typed condition.
+    # ... and select generation will replace this part with a select
+    # that simplifies into simply returning the stored boolean.
     if-eqz v2, :else
     const v0, 0x1
     return v0
diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run
index 055035b..851be09 100644
--- a/test/595-profile-saving/run
+++ b/test/595-profile-saving/run
@@ -19,9 +19,11 @@
 # and to make sure the test is not compiled  when loaded (by PathClassLoader)
 # -Xjitsaveprofilinginfo to enable profile saving
 # -Xusejit:false to disable jit and only test profiles.
+# -Xjitinitialsize:32M to prevent profiling info creation failure.
 exec ${RUN} \
   -Xcompiler-option --compiler-filter=quicken \
   --runtime-option '-Xcompiler-option --compiler-filter=quicken' \
+  --runtime-option -Xjitinitialsize:32M \
   --runtime-option -Xjitsaveprofilinginfo \
   --runtime-option -Xusejit:false \
   --runtime-option -Xps-profile-boot-class-path \
diff --git a/test/663-checker-select-generator/expected.txt b/test/663-checker-select-generator/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/663-checker-select-generator/expected.txt
diff --git a/test/663-checker-select-generator/info.txt b/test/663-checker-select-generator/info.txt
new file mode 100644
index 0000000..792779f
--- /dev/null
+++ b/test/663-checker-select-generator/info.txt
@@ -0,0 +1,14 @@
+Test for select generation for conditional returns.
+
+Tests the rewriting from:
+
+             If [ Condition ]
+               /          \
+     false branch        true branch
+     return FalseValue   return TrueValue
+
+to:
+
+     true branch
+     false branch
+     return Select [FalseValue, TrueValue, Condition]
diff --git a/test/663-checker-select-generator/smali/TestCase.smali b/test/663-checker-select-generator/smali/TestCase.smali
new file mode 100644
index 0000000..844a9cf
--- /dev/null
+++ b/test/663-checker-select-generator/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: boolean TestCase.testCase(boolean) select_generator (before)
+## CHECK-DAG:     <<Param:z\d+>>           ParameterValue
+## CHECK-DAG:     <<Int0:i\d+>>            IntConstant 0
+## CHECK-DAG:     <<Int1:i\d+>>            IntConstant 1
+## CHECK-DAG:                              If [<<Param>>]
+## CHECK-DAG:                              Return [<<Int0>>]
+## CHECK-DAG:                              Return [<<Int1>>]
+
+## CHECK-START: boolean TestCase.testCase(boolean) select_generator (after)
+## CHECK-DAG:     <<Param:z\d+>>           ParameterValue
+## CHECK-DAG:     <<Int0:i\d+>>            IntConstant 0
+## CHECK-DAG:     <<Int1:i\d+>>            IntConstant 1
+## CHECK-DAG:     <<Select:i\d+>>          Select [<<Int0>>,<<Int1>>,<<Param>>]
+## CHECK-DAG:                              Return [<<Select>>]
+
+.method public static testCase(Z)Z
+    .registers 1
+
+    # The select generation will replace this with a select
+    # instruction and a return.
+    if-eqz v0, :else
+    const v0, 0x1
+    return v0
+
+    :else
+    const v0, 0x0
+    return v0
+.end method
+
+
+## CHECK-START: java.lang.Object TestCase.referenceTypeTestCase(Main$Sub1, Main$Sub2, boolean) select_generator (before)
+## CHECK-DAG:     <<Param0:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param1:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param2:z\d+>>          ParameterValue
+## CHECK-DAG:                              If [<<Param2>>]
+## CHECK-DAG:                              Return [<<Param1>>]
+## CHECK-DAG:                              Return [<<Param0>>]
+
+## CHECK-START: java.lang.Object TestCase.referenceTypeTestCase(Main$Sub1, Main$Sub2, boolean) select_generator (after)
+## CHECK-DAG:     <<Param0:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param1:l\d+>>          ParameterValue
+## CHECK-DAG:     <<Param2:z\d+>>          ParameterValue
+## CHECK-DAG:     <<Select:l\d+>>          Select [<<Param1>>,<<Param0>>,<<Param2>>]
+## CHECK-DAG:                              Return [<<Select>>]
+
+.method public static referenceTypeTestCase(LMain$Sub1;LMain$Sub2;Z)Ljava/lang/Object;
+    .registers 3
+
+    if-eqz v2, :else
+    return-object v0
+
+    :else
+    return-object v1
+.end method
diff --git a/test/663-checker-select-generator/src/Main.java b/test/663-checker-select-generator/src/Main.java
new file mode 100644
index 0000000..c5c7a43
--- /dev/null
+++ b/test/663-checker-select-generator/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static class Super {}
+  public static class Sub1 {}
+  public static class Sub2 {}
+
+  public static void assertTrue(boolean result) {
+    if (!result) {
+      throw new Error("Expected true");
+    }
+  }
+
+  public static void assertFalse(boolean result) {
+    if (result) {
+      throw new Error("Expected false");
+    }
+  }
+
+  public static void assertInstanceOfSub1(Object result) {
+    if (!(result instanceof Sub1)) {
+      throw new Error("Expected instance of Sub1");
+    }
+  }
+
+  public static void assertInstanceOfSub2(Object result) {
+    if (!(result instanceof Sub2)) {
+      throw new Error("Expected instance of Sub2");
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase", boolean.class);
+    Method m2 = c.getMethod("referenceTypeTestCase", Sub1.class, Sub2.class, boolean.class);
+
+    try {
+      assertTrue((Boolean) m.invoke(null, true));
+      assertFalse((Boolean) m.invoke(null, false));
+      assertInstanceOfSub1(m2.invoke(null, new Sub1(), new Sub2(), true));
+      assertInstanceOfSub2(m2.invoke(null, new Sub1(), new Sub2(), false));
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+}
diff --git a/test/664-aget-verifier/aget-verifier.cc b/test/664-aget-verifier/aget-verifier.cc
new file mode 100644
index 0000000..41372ad
--- /dev/null
+++ b/test/664-aget-verifier/aget-verifier.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file.h"
+
+#include "art_method-inl.h"
+#include "jni.h"
+#include "method_reference.h"
+#include "mirror/class-inl.h"
+#include "mirror/executable.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_testCompiled(JNIEnv* env,
+                                                             jclass,
+                                                             jobject method) {
+  CHECK(method != nullptr);
+  ScopedObjectAccess soa(env);
+  ObjPtr<mirror::Executable> exec = soa.Decode<mirror::Executable>(method);
+  ArtMethod* art_method = exec->GetArtMethod();
+  return art_method->HasAnyCompiledCode();
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/664-aget-verifier/expected.txt b/test/664-aget-verifier/expected.txt
new file mode 100644
index 0000000..50e2e94
--- /dev/null
+++ b/test/664-aget-verifier/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+test method successfully verified/compiled.
diff --git a/test/664-aget-verifier/info.txt b/test/664-aget-verifier/info.txt
new file mode 100644
index 0000000..b59cacb
--- /dev/null
+++ b/test/664-aget-verifier/info.txt
@@ -0,0 +1,6 @@
+Tests how the verifier handles aget on an array that was initially null.
+
+The verifier will flag aget instructions as have_pending_runtime_throw_failure_
+if the array register is potentially null, even if the aget is guarded by null
+checks and never actually null at runtime. This fails compile-time verification,
+preventing otherwise good method from being compiled.
diff --git a/test/664-aget-verifier/src/Main.java b/test/664-aget-verifier/src/Main.java
new file mode 100644
index 0000000..7a92b17
--- /dev/null
+++ b/test/664-aget-verifier/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    test();
+
+    try {
+      if (testCompiled(Main.class.getDeclaredMethod("test"))) {
+        System.out.println("test method successfully verified/compiled.");
+      } else {
+        System.out.println("test method failed to verify/compile.");
+      }
+    } catch (Exception e) {
+      System.out.println("Got unexpected exception: " + e);
+    }
+  }
+
+  public static void test() {
+    int[] maybe_null_array = null;
+    for (int i = 0; i < 2; i++) {
+      int[] non_null_array = new int[1];
+      if (maybe_null_array != null) {
+        i = maybe_null_array[0] + 1;
+      }
+      maybe_null_array = non_null_array;
+    }
+  }
+
+  public static native boolean testCompiled(Method method);
+}
diff --git a/test/Android.bp b/test/Android.bp
index fab664a..7413ee5 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -411,6 +411,7 @@
         "642-fp-callees/fp_callees.cc",
         "647-jni-get-field-id/get_field_id.cc",
         "656-annotation-lookup-generic-jni/test.cc",
+	"664-aget-verifier/aget-verifier.cc",
         "708-jit-cache-churn/jit.cc"
     ],
     shared_libs: [
diff --git a/test/knownfailures.json b/test/knownfailures.json
index a8191bb..20cfc34 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -54,11 +54,6 @@
                         "doesn't (and isn't meant to) work with --prebuild."]
     },
     {
-        "tests": ["529-checker-unresolved"],
-        "variant": "no-prebuild",
-        "bug": "http://b/27784033"
-    },
-    {
         "tests": ["117-nopatchoat",
                   "147-stripped-dex-fallback",
                   "608-checker-unresolved-lse"],
@@ -505,6 +500,7 @@
             "641-checker-arraycopy",
             "643-checker-bogus-ic",
             "645-checker-abs-simd",
+            "663-checker-select-generator",
             "706-checker-scheduler"],
         "description": ["Checker tests are not compatible with jvmti."],
         "variant": "jvmti-stress | redefine-stress | trace-stress | field-stress | step-stress"
@@ -720,9 +716,8 @@
         "description": ["Test hits dex2oat watchdog timeout (60sec) on art-asan"]
     },
     {
-        "tests": "662-regression-alias",
-        "variant": "target",
-        "description": ["disable until ARM scheduling/aliasing bug is fixed."],
-        "bug": "b/64018485"
+        "tests": "664-aget-verifier",
+        "description": ["Aget on potentially null array fails verification."],
+        "bug": "b/64683522"
     }
 ]
diff --git a/tools/Android.mk b/tools/Android.mk
index bc2fd8c..9ecf0cd 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -20,13 +20,15 @@
 include $(CLEAR_VARS)
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_CLASS := EXECUTABLES
-LOCAL_MODULE := art
+LOCAL_MODULE := art-script
 LOCAL_SRC_FILES := art
+LOCAL_MODULE_STEM := art
 include $(BUILD_PREBUILT)
 
 # Copy the art shell script to the target's bin directory
 include $(CLEAR_VARS)
 LOCAL_MODULE_CLASS := EXECUTABLES
-LOCAL_MODULE := art
+LOCAL_MODULE := art-script
 LOCAL_SRC_FILES := art
+LOCAL_MODULE_STEM := art
 include $(BUILD_PREBUILT)
diff --git a/tools/dexfuzz/src/dexfuzz/DexFuzz.java b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
index 1e37def..feb5a13 100644
--- a/tools/dexfuzz/src/dexfuzz/DexFuzz.java
+++ b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
@@ -33,9 +33,9 @@
  * Entrypoint class for dexfuzz.
  */
 public class DexFuzz {
-  // Last version update 1.8: Added a new mutation called NewInstanceChanger.
+  // Last version update 1.9: fixed a bug in InvokeChanger.
   private static int majorVersion = 1;
-  private static int minorVersion = 8;
+  private static int minorVersion = 9;
   private static int seedChangeVersion = 0;
 
   /**
diff --git a/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java b/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
index 8750fc6..f0ed83a 100644
--- a/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
+++ b/tools/dexfuzz/src/dexfuzz/program/mutators/InvokeChanger.java
@@ -167,7 +167,7 @@
   }
 
   private boolean isRangeInvokeInst(Opcode opcode){
-    return Opcode.isBetween(opcode, Opcode.INVOKE_VIRTUAL, Opcode.INVOKE_INTERFACE);
+    return Opcode.isBetween(opcode, Opcode.INVOKE_VIRTUAL_RANGE, Opcode.INVOKE_INTERFACE_RANGE);
 
   }
 
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index c6553f8..ea26b0e 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -211,13 +211,6 @@
           "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
 },
 {
-  description: "Test is timing sensitive",
-  result: EXEC_FAILED,
-  bug: 62528691,
-  modes: [device],
-  names: ["libcore.java.util.TimeZoneTest#testSetDefaultRace"]
-},
-{
   description: "Repeated annotations do not work in javac (OpenJDK8), fixed in OpenJDK9.
                 Blacklisted to support javac/dx build (b/36902714)",
   result: EXEC_FAILED,